diff --git a/backend/api/onboarding_utils/onboarding_completion_service.py b/backend/api/onboarding_utils/onboarding_completion_service.py index edc06386..3b1a237e 100644 --- a/backend/api/onboarding_utils/onboarding_completion_service.py +++ b/backend/api/onboarding_utils/onboarding_completion_service.py @@ -81,6 +81,30 @@ class OnboardingCompletionService: # Non-critical: log but don't fail onboarding completion logger.warning(f"Failed to create OAuth token monitoring tasks for user {user_id}: {e}") + # Create website analysis tasks for user's website and competitors + try: + from services.database import SessionLocal + from services.website_analysis_monitoring_service import create_website_analysis_tasks + db = SessionLocal() + try: + result = create_website_analysis_tasks(user_id=user_id, db=db) + if result.get('success'): + tasks_count = result.get('tasks_created', 0) + logger.info( + f"Created {tasks_count} website analysis tasks for user {user_id} " + f"on onboarding completion" + ) + else: + error = result.get('error', 'Unknown error') + logger.warning( + f"Failed to create website analysis tasks for user {user_id}: {error}" + ) + finally: + db.close() + except Exception as e: + # Non-critical: log but don't fail onboarding completion + logger.warning(f"Failed to create website analysis tasks for user {user_id}: {e}") + return { "message": "Onboarding completed successfully", "completed_at": datetime.now().isoformat(), diff --git a/backend/api/onboarding_utils/step3_research_service.py b/backend/api/onboarding_utils/step3_research_service.py index 6ee8ca45..14405b36 100644 --- a/backend/api/onboarding_utils/step3_research_service.py +++ b/backend/api/onboarding_utils/step3_research_service.py @@ -432,13 +432,13 @@ class Step3ResearchService: logger.error(f"Error storing research data: {str(e)}") return False - async def get_research_data(self, session_id: str) -> Dict[str, Any]: + async def get_research_data(self, session_id: str) -> Dict[str, Any]: """ Retrieve research data for a session. - + Args: session_id: Onboarding session ID - + Returns: Dictionary containing research data """ @@ -447,25 +447,76 @@ class Step3ResearchService: session = db.query(OnboardingSession).filter( OnboardingSession.id == session_id ).first() - + if not session: return { "success": False, "error": "Session not found" } - - research_data = session.step_data.get("step3_research_data") if session.step_data else None - + + # Check if step_data attribute exists (it may not be in the model) + # If it doesn't exist, try to get data from CompetitorAnalysis table + research_data = None + if hasattr(session, 'step_data') and session.step_data: + research_data = session.step_data.get("step3_research_data") if isinstance(session.step_data, dict) else None + + # If not found in step_data, try CompetitorAnalysis table + if not research_data: + try: + from models.onboarding import CompetitorAnalysis + competitor_records = db.query(CompetitorAnalysis).filter( + CompetitorAnalysis.session_id == session.id + ).all() + + if competitor_records: + competitors = [] + for record in competitor_records: + analysis_data = record.analysis_data or {} + competitor_info = { + "url": record.competitor_url, + "domain": record.competitor_domain or record.competitor_url, + "title": analysis_data.get("title", record.competitor_domain or ""), + "summary": analysis_data.get("summary", ""), + "relevance_score": analysis_data.get("relevance_score", 0.5), + "highlights": analysis_data.get("highlights", []), + "favicon": analysis_data.get("favicon"), + "image": analysis_data.get("image"), + "published_date": analysis_data.get("published_date"), + "author": analysis_data.get("author"), + "competitive_insights": analysis_data.get("competitive_analysis", {}), + "content_insights": analysis_data.get("content_insights", {}) + } + competitors.append(competitor_info) + + if competitors: + # Map competitor fields to match frontend expectations + mapped_competitors = [] + for comp in competitors: + mapped_comp = { + **comp, # Keep all original fields + "name": comp.get("title") or comp.get("name") or comp.get("domain", ""), + "description": comp.get("summary") or comp.get("description", ""), + "similarity_score": comp.get("relevance_score") or comp.get("similarity_score", 0.5) + } + mapped_competitors.append(mapped_comp) + + research_data = { + "competitors": mapped_competitors, + "completed_at": competitor_records[0].created_at.isoformat() if competitor_records[0].created_at else None + } + except Exception as e: + logger.warning(f"Could not retrieve competitors from CompetitorAnalysis table: {e}") + if not research_data: return { "success": False, - "error": "No research data found for this session" + "error": "No research data found for this session" } return { "success": True, - "research_data": research_data, - "session_id": session_id + "step3_research_data": research_data, + "research_data": research_data # Keep for backward compatibility } except Exception as e: diff --git a/backend/api/research_config.py b/backend/api/research_config.py index 214b572a..d6fe922b 100644 --- a/backend/api/research_config.py +++ b/backend/api/research_config.py @@ -4,12 +4,12 @@ Provides provider availability and persona-aware defaults for research. """ from fastapi import APIRouter, Depends, HTTPException, Query -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, List from loguru import logger from pydantic import BaseModel from middleware.auth_middleware import get_current_user -from services.user_api_key_context import get_exa_key, get_gemini_key +from services.user_api_key_context import get_exa_key, get_gemini_key, get_tavily_key from services.onboarding.database_service import OnboardingDatabaseService from services.onboarding.progress_service import get_onboarding_progress_service from services.database import get_db @@ -26,8 +26,10 @@ class ProviderAvailability(BaseModel): """Provider availability status.""" google_available: bool exa_available: bool + tavily_available: bool gemini_key_status: str # 'configured' | 'missing' exa_key_status: str # 'configured' | 'missing' + tavily_key_status: str # 'configured' | 'missing' class PersonaDefaults(BaseModel): @@ -47,6 +49,17 @@ class ResearchConfigResponse(BaseModel): persona_scheduled: bool = False +class CompetitorAnalysisResponse(BaseModel): + """Response model for competitor analysis data.""" + success: bool + competitors: Optional[List[Dict[str, Any]]] = None + social_media_accounts: Optional[Dict[str, str]] = None + social_media_citations: Optional[List[Dict[str, Any]]] = None + research_summary: Optional[Dict[str, Any]] = None + analysis_timestamp: Optional[str] = None + error: Optional[str] = None + + @router.get("/provider-availability", response_model=ProviderAvailability) async def get_provider_availability( current_user: Dict = Depends(get_current_user) @@ -57,6 +70,7 @@ async def get_provider_availability( Returns: - google_available: True if Gemini key is configured - exa_available: True if Exa key is configured + - tavily_available: True if Tavily key is configured - Key status for each provider """ try: @@ -65,15 +79,19 @@ async def get_provider_availability( # Check API key availability gemini_key = get_gemini_key(user_id) exa_key = get_exa_key(user_id) + tavily_key = get_tavily_key(user_id) google_available = bool(gemini_key and gemini_key.strip()) exa_available = bool(exa_key and exa_key.strip()) + tavily_available = bool(tavily_key and tavily_key.strip()) return ProviderAvailability( google_available=google_available, exa_available=exa_available, + tavily_available=tavily_available, gemini_key_status='configured' if google_available else 'missing', - exa_key_status='configured' if exa_available else 'missing' + exa_key_status='configured' if exa_available else 'missing', + tavily_key_status='configured' if tavily_available else 'missing' ) except Exception as e: logger.error(f"[ResearchConfig] Error checking provider availability for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True) @@ -211,15 +229,19 @@ async def get_research_config( logger.debug(f"[ResearchConfig] Getting provider availability for user {user_id}") gemini_key = get_gemini_key(user_id) exa_key = get_exa_key(user_id) + tavily_key = get_tavily_key(user_id) google_available = bool(gemini_key and gemini_key.strip()) exa_available = bool(exa_key and exa_key.strip()) + tavily_available = bool(tavily_key and tavily_key.strip()) provider_availability = ProviderAvailability( google_available=google_available, exa_available=exa_available, + tavily_available=tavily_available, gemini_key_status='configured' if google_available else 'missing', - exa_key_status='configured' if exa_available else 'missing' + exa_key_status='configured' if exa_available else 'missing', + tavily_key_status='configured' if tavily_available else 'missing' ) # Get persona defaults @@ -355,11 +377,190 @@ async def get_research_config( import traceback logger.error(f"[ResearchConfig] Full traceback:\n{traceback.format_exc()}") raise HTTPException( - status_code=500, + status_code=500, detail=f"Failed to get research config: {str(e)}" ) +@router.get("/competitor-analysis", response_model=CompetitorAnalysisResponse) +async def get_competitor_analysis( + current_user: Dict = Depends(get_current_user), + db: Session = Depends(get_db) +): + """ + Get competitor analysis data from onboarding for the current user. + + Returns competitor data including competitors list, social media accounts, + social media citations, and research summary that was collected during onboarding step 3. + """ + user_id = None + try: + user_id = str(current_user.get('id')) + print(f"\n[COMPETITOR_ANALYSIS] ===== START: Getting competitor analysis for user_id={user_id} =====") + print(f"[COMPETITOR_ANALYSIS] Current user dict keys: {list(current_user.keys())}") + logger.info(f"[ResearchConfig] Getting competitor analysis for user {user_id}") + + if not db: + print(f"[COMPETITOR_ANALYSIS] ❌ ERROR: Database session is None for user {user_id}") + logger.error(f"[ResearchConfig] Database session is None for user {user_id}") + raise HTTPException(status_code=500, detail="Database session not available") + + db_service = OnboardingDatabaseService(db=db) + + # Get onboarding session - using same pattern as onboarding completion check + print(f"[COMPETITOR_ANALYSIS] Looking up onboarding session for user_id={user_id} (Clerk ID)") + session = db_service.get_session_by_user(user_id, db) + if not session: + print(f"[COMPETITOR_ANALYSIS] ❌ WARNING: No onboarding session found for user_id={user_id}") + logger.warning(f"[ResearchConfig] No onboarding session found for user {user_id}") + return CompetitorAnalysisResponse( + success=False, + error="No onboarding session found. Please complete onboarding first." + ) + + print(f"[COMPETITOR_ANALYSIS] ✅ Found onboarding session: id={session.id}, user_id={session.user_id}, current_step={session.current_step}") + + # Check if step 3 is completed - same pattern as elsewhere (check current_step >= 3 or research_preferences exists) + research_preferences = db_service.get_research_preferences(user_id, db) + print(f"[COMPETITOR_ANALYSIS] Step check: current_step={session.current_step}, research_preferences exists={research_preferences is not None}") + if not research_preferences and session.current_step < 3: + print(f"[COMPETITOR_ANALYSIS] ❌ Step 3 not completed for user_id={user_id} (current_step={session.current_step})") + logger.info(f"[ResearchConfig] Step 3 not completed for user {user_id} (current_step={session.current_step})") + return CompetitorAnalysisResponse( + success=False, + error="Onboarding step 3 (Competitor Analysis) is not completed. Please complete onboarding step 3 first." + ) + + print(f"[COMPETITOR_ANALYSIS] ✅ Step 3 is completed (current_step={session.current_step} or research_preferences exists)") + + # Try Method 1: Get competitor data from CompetitorAnalysis table using OnboardingDatabaseService + # This follows the same pattern as get_website_analysis() + print(f"[COMPETITOR_ANALYSIS] 🔍 Method 1: Querying CompetitorAnalysis table using OnboardingDatabaseService...") + try: + competitors = db_service.get_competitor_analysis(user_id, db) + + if competitors: + print(f"[COMPETITOR_ANALYSIS] ✅ Found {len(competitors)} competitor records from CompetitorAnalysis table") + logger.info(f"[ResearchConfig] Found {len(competitors)} competitors from CompetitorAnalysis table for user {user_id}") + + # Map competitor fields to match frontend expectations + mapped_competitors = [] + for comp in competitors: + mapped_comp = { + **comp, # Keep all original fields + "name": comp.get("title") or comp.get("name") or comp.get("domain", ""), + "description": comp.get("summary") or comp.get("description", ""), + "similarity_score": comp.get("relevance_score") or comp.get("similarity_score", 0.5) + } + mapped_competitors.append(mapped_comp) + + print(f"[COMPETITOR_ANALYSIS] ✅ SUCCESS: Returning {len(mapped_competitors)} competitors for user_id={user_id}") + return CompetitorAnalysisResponse( + success=True, + competitors=mapped_competitors, + social_media_accounts={}, + social_media_citations=[], + research_summary={ + "total_competitors": len(mapped_competitors), + "market_insights": f"Found {len(mapped_competitors)} competitors analyzed during onboarding" + }, + analysis_timestamp=None + ) + else: + print(f"[COMPETITOR_ANALYSIS] ⚠️ No competitor records found in CompetitorAnalysis table for user_id={user_id}") + + except Exception as e: + print(f"[COMPETITOR_ANALYSIS] ❌ EXCEPTION in Method 1: {e}") + import traceback + print(f"[COMPETITOR_ANALYSIS] Traceback:\n{traceback.format_exc()}") + logger.warning(f"[ResearchConfig] Could not retrieve competitor data from CompetitorAnalysis table: {e}", exc_info=True) + + # Try Method 2: Get data from Step3ResearchService (which accesses step_data) + # This is where step3_research_service._store_research_data() saves the data + print(f"[COMPETITOR_ANALYSIS] 🔄 Method 2: Trying Step3ResearchService.get_research_data()...") + try: + from api.onboarding_utils.step3_research_service import Step3ResearchService + + # Step3ResearchService.get_research_data() expects session_id (integer), but we have user_id (string) + # The service uses session.id internally, so we need to pass the session.id + step3_service = Step3ResearchService() + research_data_result = await step3_service.get_research_data(str(session.id)) + + print(f"[COMPETITOR_ANALYSIS] Step3ResearchService.get_research_data() result: success={research_data_result.get('success')}") + + if research_data_result.get('success'): + # Handle both 'research_data' and 'step3_research_data' keys + research_data = research_data_result.get('step3_research_data') or research_data_result.get('research_data', {}) + print(f"[COMPETITOR_ANALYSIS] Research data keys: {list(research_data.keys()) if isinstance(research_data, dict) else 'Not a dict'}") + + if isinstance(research_data, dict) and research_data.get('competitors'): + competitors_list = research_data.get('competitors', []) + print(f"[COMPETITOR_ANALYSIS] ✅ Found {len(competitors_list)} competitors in step_data via Step3ResearchService") + + if competitors_list: + analysis_metadata = research_data.get('analysis_metadata', {}) + social_media_data = analysis_metadata.get('social_media_data', {}) + + # Map competitor fields to match frontend expectations + mapped_competitors = [] + for comp in competitors_list: + mapped_comp = { + **comp, # Keep all original fields + "name": comp.get("title") or comp.get("name") or comp.get("domain", ""), + "description": comp.get("summary") or comp.get("description", ""), + "similarity_score": comp.get("relevance_score") or comp.get("similarity_score", 0.5) + } + mapped_competitors.append(mapped_comp) + + print(f"[COMPETITOR_ANALYSIS] ✅ SUCCESS: Returning {len(mapped_competitors)} competitors from step_data for user_id={user_id}") + logger.info(f"[ResearchConfig] Found {len(mapped_competitors)} competitors from step_data via Step3ResearchService for user {user_id}") + return CompetitorAnalysisResponse( + success=True, + competitors=mapped_competitors, + social_media_accounts=social_media_data.get('social_media_accounts', {}), + social_media_citations=social_media_data.get('citations', []), + research_summary=research_data.get('research_summary'), + analysis_timestamp=research_data.get('completed_at') + ) + else: + print(f"[COMPETITOR_ANALYSIS] ⚠️ Step3ResearchService returned competitors list but it's empty") + else: + print(f"[COMPETITOR_ANALYSIS] ⚠️ Step3ResearchService returned success=True but no competitors in data") + else: + error_msg = research_data_result.get('error', 'Unknown error') + print(f"[COMPETITOR_ANALYSIS] ⚠️ Step3ResearchService returned success=False, error: {error_msg}") + + except Exception as e: + print(f"[COMPETITOR_ANALYSIS] ❌ EXCEPTION in Method 2: {e}") + import traceback + print(f"[COMPETITOR_ANALYSIS] Traceback:\n{traceback.format_exc()}") + logger.warning(f"[ResearchConfig] Could not retrieve competitor data from Step3ResearchService: {e}", exc_info=True) + + # Fallback: Return empty response with helpful message + print(f"[COMPETITOR_ANALYSIS] ❌ FALLBACK: No competitor analysis data found for user_id={user_id}") + print(f"[COMPETITOR_ANALYSIS] Step 3 is completed (current_step={session.current_step}) but no data found in either source") + logger.info(f"[ResearchConfig] No competitor analysis data found for user {user_id} (step 3 completed but no data found)") + return CompetitorAnalysisResponse( + success=False, + error="Competitor analysis data was not found in the database. Please re-run competitor discovery in Step 3 of onboarding to generate and save competitor data." + ) + + except HTTPException: + print(f"[COMPETITOR_ANALYSIS] ❌ HTTPException raised (will be re-raised)") + raise + except Exception as e: + print(f"[COMPETITOR_ANALYSIS] ❌ CRITICAL ERROR: {e}") + import traceback + print(f"[COMPETITOR_ANALYSIS] Traceback:\n{traceback.format_exc()}") + logger.error(f"[ResearchConfig] Error getting competitor analysis for user {user_id if user_id else 'unknown'}: {e}", exc_info=True) + raise HTTPException( + status_code=500, + detail=f"Failed to get competitor analysis: {str(e)}" + ) + finally: + print(f"[COMPETITOR_ANALYSIS] ===== END: Getting competitor analysis for user_id={user_id} =====\n") + + # Helper functions from RESEARCH_AI_HYPERPERSONALIZATION.md def _get_domain_suggestions(industry: str) -> list[str]: diff --git a/backend/api/scheduler_dashboard.py b/backend/api/scheduler_dashboard.py index c193dc57..967778ce 100644 --- a/backend/api/scheduler_dashboard.py +++ b/backend/api/scheduler_dashboard.py @@ -18,11 +18,68 @@ from middleware.auth_middleware import get_current_user from models.monitoring_models import TaskExecutionLog, MonitoringTask from models.scheduler_models import SchedulerEventLog from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask -from sqlalchemy import func +from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog +from models.website_analysis_monitoring_models import WebsiteAnalysisTask, WebsiteAnalysisExecutionLog router = APIRouter(prefix="/api/scheduler", tags=["scheduler-dashboard"]) +def _rebuild_cumulative_stats_from_events(db: Session) -> Dict[str, int]: + """ + Rebuild cumulative stats by aggregating all check_cycle events from event logs. + This is used as a fallback when the cumulative stats table doesn't exist or is invalid. + + Args: + db: Database session + + Returns: + Dictionary with cumulative stats + """ + try: + # Aggregate check cycle events for cumulative totals + result = db.query( + func.count(SchedulerEventLog.id), + func.sum(SchedulerEventLog.tasks_found), + func.sum(SchedulerEventLog.tasks_executed), + func.sum(SchedulerEventLog.tasks_failed) + ).filter( + SchedulerEventLog.event_type == 'check_cycle' + ).first() + + if result: + # SQLAlchemy returns tuple for multi-column queries + # SUM returns NULL when no rows, handle that + total_cycles = result[0] if result[0] is not None else 0 + total_found = result[1] if result[1] is not None else 0 + total_executed = result[2] if result[2] is not None else 0 + total_failed = result[3] if result[3] is not None else 0 + + return { + 'total_check_cycles': int(total_cycles), + 'cumulative_tasks_found': int(total_found), + 'cumulative_tasks_executed': int(total_executed), + 'cumulative_tasks_failed': int(total_failed), + 'cumulative_tasks_skipped': 0 # Not tracked in event logs currently + } + else: + return { + 'total_check_cycles': 0, + 'cumulative_tasks_found': 0, + 'cumulative_tasks_executed': 0, + 'cumulative_tasks_failed': 0, + 'cumulative_tasks_skipped': 0 + } + except Exception as e: + logger.error(f"[Dashboard] Error rebuilding cumulative stats from events: {e}", exc_info=True) + return { + 'total_check_cycles': 0, + 'cumulative_tasks_found': 0, + 'cumulative_tasks_executed': 0, + 'cumulative_tasks_failed': 0, + 'cumulative_tasks_skipped': 0 + } + + @router.get("/dashboard") async def get_scheduler_dashboard( current_user: Dict[str, Any] = Depends(get_current_user), @@ -139,98 +196,172 @@ async def get_scheduler_dashboard( except Exception as e: logger.error(f"Error loading OAuth token monitoring tasks: {e}", exc_info=True) + # Load website analysis tasks + try: + website_analysis_tasks = db.query(WebsiteAnalysisTask).filter( + WebsiteAnalysisTask.status == 'active' + ).all() + + # Filter by user if user_id_str is provided + if user_id_str: + website_analysis_tasks = [t for t in website_analysis_tasks if t.user_id == user_id_str] + + for task in website_analysis_tasks: + try: + user_job_store = get_user_job_store_name(task.user_id, db) + except Exception as e: + user_job_store = 'default' + logger.debug(f"Could not get job store for user {task.user_id}: {e}") + + # Format as recurring job + job_info = { + 'id': f"website_analysis_{task.task_type}_{task.user_id}_{task.id}", + 'trigger_type': 'CronTrigger', # Recurring based on frequency_days + 'next_run_time': task.next_check.isoformat() if task.next_check else None, + 'user_id': task.user_id, + 'job_store': 'default', + 'user_job_store': user_job_store, + 'function_name': 'website_analysis_executor.execute_task', + 'task_type': task.task_type, # 'user_website' or 'competitor' + 'website_url': task.website_url, + 'competitor_id': task.competitor_id, + 'task_id': task.id, + 'is_database_task': True, + 'frequency': f'Every {task.frequency_days} days', + 'task_category': 'website_analysis' + } + + formatted_jobs.append(job_info) + except Exception as e: + logger.error(f"Error loading website analysis tasks: {e}", exc_info=True) + + # Load platform insights tasks (GSC and Bing) + try: + insights_tasks = db.query(PlatformInsightsTask).filter( + PlatformInsightsTask.status == 'active' + ).all() + + # Filter by user if user_id_str is provided + if user_id_str: + insights_tasks = [t for t in insights_tasks if t.user_id == user_id_str] + + for task in insights_tasks: + try: + user_job_store = get_user_job_store_name(task.user_id, db) + except Exception as e: + user_job_store = 'default' + logger.debug(f"Could not get job store for user {task.user_id}: {e}") + + # Format as recurring weekly job + job_info = { + 'id': f"platform_insights_{task.platform}_{task.user_id}", + 'trigger_type': 'CronTrigger', # Weekly recurring + 'next_run_time': task.next_check.isoformat() if task.next_check else None, + 'user_id': task.user_id, + 'job_store': 'default', + 'user_job_store': user_job_store, + 'function_name': f'{task.platform}_insights_executor.execute_task', + 'platform': task.platform, + 'task_id': task.id, + 'is_database_task': True, + 'frequency': 'Weekly', + 'task_category': 'platform_insights' + } + + formatted_jobs.append(job_info) + except Exception as e: + logger.error(f"Error loading platform insights tasks: {e}", exc_info=True) + # Get active strategies count active_strategies = stats.get('active_strategies_count', 0) # Get last_update from stats (added by scheduler for frontend polling) last_update = stats.get('last_update') - # Calculate cumulative/historical values from scheduler_event_logs + # Calculate cumulative/historical values from persistent cumulative stats table + # Fallback to event logs aggregation if cumulative stats table doesn't exist or is invalid cumulative_stats = {} try: - # First, check total events in database for debugging - total_events = db.query(func.count(SchedulerEventLog.id)).scalar() or 0 + from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats - # Check for check_cycle events specifically - check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter( - SchedulerEventLog.event_type == 'check_cycle' - ).scalar() or 0 + # Try to get cumulative stats from dedicated table (persistent across restarts) + cumulative_stats_row = db.query(SchedulerCumulativeStats).filter( + SchedulerCumulativeStats.id == 1 + ).first() - # Also check for other event types that might have task counts - job_failed_count = db.query(func.count(SchedulerEventLog.id)).filter( - SchedulerEventLog.event_type == 'job_failed' - ).scalar() or 0 - job_completed_count = db.query(func.count(SchedulerEventLog.id)).filter( - SchedulerEventLog.event_type == 'job_completed' - ).scalar() or 0 - - logger.warning( - f"[Dashboard] Database stats: {total_events} total events, " - f"{check_cycle_count} check_cycles, {job_failed_count} job_failed, " - f"{job_completed_count} job_completed" - ) - - if check_cycle_count > 0: - logger.warning(f"[Dashboard] Found {check_cycle_count} check cycle events in database") - # Aggregate check cycle events for cumulative totals - result = db.query( - func.count(SchedulerEventLog.id), - func.sum(SchedulerEventLog.tasks_found), - func.sum(SchedulerEventLog.tasks_executed), - func.sum(SchedulerEventLog.tasks_failed) - ).filter( - SchedulerEventLog.event_type == 'check_cycle' - ).first() + if cumulative_stats_row: + # Use persistent cumulative stats + cumulative_stats = { + 'total_check_cycles': int(cumulative_stats_row.total_check_cycles or 0), + 'cumulative_tasks_found': int(cumulative_stats_row.cumulative_tasks_found or 0), + 'cumulative_tasks_executed': int(cumulative_stats_row.cumulative_tasks_executed or 0), + 'cumulative_tasks_failed': int(cumulative_stats_row.cumulative_tasks_failed or 0), + 'cumulative_tasks_skipped': int(cumulative_stats_row.cumulative_tasks_skipped or 0), + 'cumulative_job_completed': int(cumulative_stats_row.cumulative_job_completed or 0), + 'cumulative_job_failed': int(cumulative_stats_row.cumulative_job_failed or 0) + } - if result: - # SQLAlchemy returns tuple for multi-column queries - # SUM returns NULL when no rows, handle that - total_cycles = result[0] if result[0] is not None else 0 - total_found = result[1] if result[1] is not None else 0 - total_executed = result[2] if result[2] is not None else 0 - total_failed = result[3] if result[3] is not None else 0 - - cumulative_stats = { - 'total_check_cycles': int(total_cycles), - 'cumulative_tasks_found': int(total_found), - 'cumulative_tasks_executed': int(total_executed), - 'cumulative_tasks_failed': int(total_failed) - } - - logger.warning(f"[Dashboard] Cumulative stats from check_cycles: {cumulative_stats}") - else: - # No results (shouldn't happen with COUNT, but handle it) - cumulative_stats = { - 'total_check_cycles': 0, - 'cumulative_tasks_found': 0, - 'cumulative_tasks_executed': 0, - 'cumulative_tasks_failed': 0 - } - logger.warning("[Dashboard] Query returned None (no check cycle events)") + logger.debug( + f"[Dashboard] Using persistent cumulative stats: " + f"cycles={cumulative_stats['total_check_cycles']}, " + f"found={cumulative_stats['cumulative_tasks_found']}, " + f"executed={cumulative_stats['cumulative_tasks_executed']}, " + f"failed={cumulative_stats['cumulative_tasks_failed']}" + ) + + # Validate cumulative stats by comparing with event logs (for verification) + check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter( + SchedulerEventLog.event_type == 'check_cycle' + ).scalar() or 0 + + if cumulative_stats['total_check_cycles'] != check_cycle_count: + logger.warning( + f"[Dashboard] ⚠️ Cumulative stats validation mismatch: " + f"cumulative_stats.total_check_cycles={cumulative_stats['total_check_cycles']} " + f"vs event_logs.count={check_cycle_count}. " + f"Rebuilding cumulative stats from event logs..." + ) + # Rebuild cumulative stats from event logs + cumulative_stats = _rebuild_cumulative_stats_from_events(db) + # Update the persistent table + if cumulative_stats_row: + cumulative_stats_row.total_check_cycles = cumulative_stats['total_check_cycles'] + cumulative_stats_row.cumulative_tasks_found = cumulative_stats['cumulative_tasks_found'] + cumulative_stats_row.cumulative_tasks_executed = cumulative_stats['cumulative_tasks_executed'] + cumulative_stats_row.cumulative_tasks_failed = cumulative_stats['cumulative_tasks_failed'] + cumulative_stats_row.cumulative_tasks_skipped = cumulative_stats.get('cumulative_tasks_skipped', 0) + db.commit() + logger.warning(f"[Dashboard] ✅ Rebuilt cumulative stats: {cumulative_stats}") else: - # No check cycles yet, but we can still show job counts - # Log detailed info about why cumulative stats are 0 - if stats.get('total_checks', 0) > 0: - logger.warning( - f"[Dashboard] ⚠️ Scheduler shows {stats.get('total_checks', 0)} checks in memory, " - f"but NO check_cycle events found in database. " - f"This suggests check_cycle events are not being saved properly." - ) - else: - logger.warning( - f"[Dashboard] No check_cycle events yet. " - f"Scheduler interval: {stats.get('check_interval_minutes', 60)}min. " - f"First check cycle will run after interval expires. " - f"One-time jobs: {job_completed_count} completed, {job_failed_count} failed" - ) + # Cumulative stats table doesn't exist or is empty, rebuild from event logs + logger.warning( + "[Dashboard] Cumulative stats table not found or empty. " + "Rebuilding from event logs..." + ) + cumulative_stats = _rebuild_cumulative_stats_from_events(db) + + # Create/update the persistent table + cumulative_stats_row = SchedulerCumulativeStats.get_or_create(db) + cumulative_stats_row.total_check_cycles = cumulative_stats['total_check_cycles'] + cumulative_stats_row.cumulative_tasks_found = cumulative_stats['cumulative_tasks_found'] + cumulative_stats_row.cumulative_tasks_executed = cumulative_stats['cumulative_tasks_executed'] + cumulative_stats_row.cumulative_tasks_failed = cumulative_stats['cumulative_tasks_failed'] + cumulative_stats_row.cumulative_tasks_skipped = cumulative_stats.get('cumulative_tasks_skipped', 0) + db.commit() + logger.warning(f"[Dashboard] ✅ Created/updated cumulative stats: {cumulative_stats}") + + except ImportError: + # Cumulative stats model doesn't exist yet (migration not run) + logger.warning( + "[Dashboard] Cumulative stats model not found. " + "Falling back to event logs aggregation. " + "Run migration: create_scheduler_cumulative_stats.sql" + ) + cumulative_stats = _rebuild_cumulative_stats_from_events(db) except Exception as e: - logger.error(f"Error calculating cumulative stats: {e}", exc_info=True) - cumulative_stats = { - 'total_check_cycles': 0, - 'cumulative_tasks_found': 0, - 'cumulative_tasks_executed': 0, - 'cumulative_tasks_failed': 0 - } + logger.error(f"[Dashboard] Error getting cumulative stats: {e}", exc_info=True) + # Fallback to event logs aggregation + cumulative_stats = _rebuild_cumulative_stats_from_events(db) return { 'stats': { @@ -259,8 +390,9 @@ async def get_scheduler_dashboard( }, 'jobs': formatted_jobs, 'job_count': len(formatted_jobs), - 'recurring_jobs': 1 + len([j for j in formatted_jobs if j.get('is_database_task')]), # check_due_tasks + OAuth tasks + 'recurring_jobs': 1 + len([j for j in formatted_jobs if j.get('is_database_task')]), # check_due_tasks + all DB tasks 'one_time_jobs': len([j for j in formatted_jobs if not j.get('is_database_task') and j.get('trigger_type') == 'DateTrigger']), + 'registered_task_types': stats.get('registered_types', []), # Include registered task types 'user_isolation': { 'enabled': True, 'current_user_id': user_id_str @@ -704,3 +836,381 @@ async def get_recent_scheduler_logs( logger.error(f"Error getting recent scheduler logs: {e}") raise HTTPException(status_code=500, detail=f"Failed to get recent scheduler logs: {str(e)}") + +@router.get("/platform-insights/status/{user_id}") +async def get_platform_insights_status( + user_id: str, + db: Session = Depends(get_db), + current_user: Dict[str, Any] = Depends(get_current_user) +): + """ + Get platform insights task status for a user. + + Returns: + - GSC insights tasks + - Bing insights tasks + - Task details and execution logs + """ + try: + # Verify user can only access their own data + if str(current_user.get('id')) != user_id: + raise HTTPException(status_code=403, detail="Access denied") + + logger.debug(f"[Platform Insights Status] Getting status for user: {user_id}") + + # Get all insights tasks for user + tasks = db.query(PlatformInsightsTask).filter( + PlatformInsightsTask.user_id == user_id + ).order_by(PlatformInsightsTask.platform, PlatformInsightsTask.created_at).all() + + # Check if user has connected platforms but missing insights tasks + # Auto-create missing tasks for connected platforms + from services.oauth_token_monitoring_service import get_connected_platforms + from services.platform_insights_monitoring_service import create_platform_insights_task + + connected_platforms = get_connected_platforms(user_id) + insights_platforms = ['gsc', 'bing'] + connected_insights = [p for p in connected_platforms if p in insights_platforms] + + existing_platforms = {task.platform for task in tasks} + missing_platforms = [p for p in connected_insights if p not in existing_platforms] + + if missing_platforms: + logger.info( + f"[Platform Insights Status] User {user_id} has connected platforms {missing_platforms} " + f"but missing insights tasks. Creating tasks..." + ) + + for platform in missing_platforms: + try: + # Don't fetch site_url here - it requires API calls + # The executor will fetch it when the task runs + # Create task without site_url to avoid API calls during status checks + result = create_platform_insights_task( + user_id=user_id, + platform=platform, + site_url=None, # Will be fetched by executor when task runs + db=db + ) + + if result.get('success'): + logger.info(f"[Platform Insights Status] Created {platform.upper()} insights task for user {user_id}") + else: + logger.warning(f"[Platform Insights Status] Failed to create {platform} task: {result.get('error')}") + except Exception as e: + logger.warning(f"[Platform Insights Status] Error creating {platform} task: {e}", exc_info=True) + + # Re-query tasks after creation + tasks = db.query(PlatformInsightsTask).filter( + PlatformInsightsTask.user_id == user_id + ).order_by(PlatformInsightsTask.platform, PlatformInsightsTask.created_at).all() + + # Group tasks by platform + gsc_tasks = [t for t in tasks if t.platform == 'gsc'] + bing_tasks = [t for t in tasks if t.platform == 'bing'] + + logger.debug( + f"[Platform Insights Status] Found {len(tasks)} total tasks: " + f"{len(gsc_tasks)} GSC, {len(bing_tasks)} Bing" + ) + + # Format tasks + def format_task(task: PlatformInsightsTask) -> Dict[str, Any]: + return { + 'id': task.id, + 'platform': task.platform, + 'site_url': task.site_url, + 'status': task.status, + 'last_check': task.last_check.isoformat() if task.last_check else None, + 'last_success': task.last_success.isoformat() if task.last_success else None, + 'last_failure': task.last_failure.isoformat() if task.last_failure else None, + 'failure_reason': task.failure_reason, + 'next_check': task.next_check.isoformat() if task.next_check else None, + 'created_at': task.created_at.isoformat() if task.created_at else None, + 'updated_at': task.updated_at.isoformat() if task.updated_at else None + } + + return { + 'success': True, + 'user_id': user_id, + 'gsc_tasks': [format_task(t) for t in gsc_tasks], + 'bing_tasks': [format_task(t) for t in bing_tasks], + 'total_tasks': len(tasks) + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting platform insights status for user {user_id}: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to get platform insights status: {str(e)}") + + +@router.get("/website-analysis/status/{user_id}") +async def get_website_analysis_status( + user_id: str, + db: Session = Depends(get_db), + current_user: Dict[str, Any] = Depends(get_current_user) +): + """ + Get website analysis task status for a user. + + Returns: + - User website tasks + - Competitor website tasks + - Task details and execution logs + """ + try: + # Verify user can only access their own data + if str(current_user.get('id')) != user_id: + raise HTTPException(status_code=403, detail="Access denied") + + logger.debug(f"[Website Analysis Status] Getting status for user: {user_id}") + + # Get all website analysis tasks for user + tasks = db.query(WebsiteAnalysisTask).filter( + WebsiteAnalysisTask.user_id == user_id + ).order_by(WebsiteAnalysisTask.task_type, WebsiteAnalysisTask.created_at).all() + + # Separate user website and competitor tasks + user_website_tasks = [t for t in tasks if t.task_type == 'user_website'] + competitor_tasks = [t for t in tasks if t.task_type == 'competitor'] + + logger.debug( + f"[Website Analysis Status] Found {len(tasks)} tasks for user {user_id}: " + f"{len(user_website_tasks)} user website, {len(competitor_tasks)} competitors" + ) + + # Format tasks + def format_task(task: WebsiteAnalysisTask) -> Dict[str, Any]: + return { + 'id': task.id, + 'website_url': task.website_url, + 'task_type': task.task_type, + 'competitor_id': task.competitor_id, + 'status': task.status, + 'last_check': task.last_check.isoformat() if task.last_check else None, + 'last_success': task.last_success.isoformat() if task.last_success else None, + 'last_failure': task.last_failure.isoformat() if task.last_failure else None, + 'failure_reason': task.failure_reason, + 'next_check': task.next_check.isoformat() if task.next_check else None, + 'frequency_days': task.frequency_days, + 'created_at': task.created_at.isoformat() if task.created_at else None, + 'updated_at': task.updated_at.isoformat() if task.updated_at else None + } + + active_tasks = len([t for t in tasks if t.status == 'active']) + failed_tasks = len([t for t in tasks if t.status == 'failed']) + + return { + 'success': True, + 'data': { + 'user_id': user_id, + 'user_website_tasks': [format_task(t) for t in user_website_tasks], + 'competitor_tasks': [format_task(t) for t in competitor_tasks], + 'total_tasks': len(tasks), + 'active_tasks': active_tasks, + 'failed_tasks': failed_tasks + } + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting website analysis status for user {user_id}: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to get website analysis status: {str(e)}") + + +@router.get("/website-analysis/logs/{user_id}") +async def get_website_analysis_logs( + user_id: str, + task_id: Optional[int] = Query(None), + limit: int = Query(10, ge=1, le=100), + offset: int = Query(0, ge=0), + db: Session = Depends(get_db), + current_user: Dict[str, Any] = Depends(get_current_user) +): + """ + Get execution logs for website analysis tasks. + + Args: + user_id: User ID + task_id: Optional task ID to filter logs + limit: Maximum number of logs to return + offset: Pagination offset + + Returns: + List of execution logs + """ + try: + # Verify user can only access their own data + if str(current_user.get('id')) != user_id: + raise HTTPException(status_code=403, detail="Access denied") + + query = db.query(WebsiteAnalysisExecutionLog).join( + WebsiteAnalysisTask, + WebsiteAnalysisExecutionLog.task_id == WebsiteAnalysisTask.id + ).filter( + WebsiteAnalysisTask.user_id == user_id + ) + + if task_id: + query = query.filter(WebsiteAnalysisExecutionLog.task_id == task_id) + + # Get total count + total_count = query.count() + + logs = query.order_by( + desc(WebsiteAnalysisExecutionLog.execution_date) + ).offset(offset).limit(limit).all() + + # Format logs + formatted_logs = [] + for log in logs: + # Get task details + task = db.query(WebsiteAnalysisTask).filter(WebsiteAnalysisTask.id == log.task_id).first() + + formatted_logs.append({ + 'id': log.id, + 'task_id': log.task_id, + 'website_url': task.website_url if task else None, + 'task_type': task.task_type if task else None, + 'execution_date': log.execution_date.isoformat() if log.execution_date else None, + 'status': log.status, + 'result_data': log.result_data, + 'error_message': log.error_message, + 'execution_time_ms': log.execution_time_ms, + 'created_at': log.created_at.isoformat() if log.created_at else None + }) + + return { + 'logs': formatted_logs, + 'total_count': total_count, + 'limit': limit, + 'offset': offset, + 'has_more': (offset + limit) < total_count + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting website analysis logs for user {user_id}: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to get website analysis logs: {str(e)}") + + +@router.post("/website-analysis/retry/{task_id}") +async def retry_website_analysis( + task_id: int, + db: Session = Depends(get_db), + current_user: Dict[str, Any] = Depends(get_current_user) +): + """ + Manually retry a failed website analysis task. + + Args: + task_id: Task ID to retry + + Returns: + Success status and updated task details + """ + try: + # Get task + task = db.query(WebsiteAnalysisTask).filter(WebsiteAnalysisTask.id == task_id).first() + + if not task: + raise HTTPException(status_code=404, detail="Task not found") + + # Verify user can only access their own tasks + if str(current_user.get('id')) != task.user_id: + raise HTTPException(status_code=403, detail="Access denied") + + # Reset task status and schedule immediate execution + task.status = 'active' + task.failure_reason = None + task.next_check = datetime.utcnow() # Schedule immediately + task.updated_at = datetime.utcnow() + + db.commit() + + logger.info(f"Manually retried website analysis task {task_id} for user {task.user_id}") + + return { + 'success': True, + 'message': f'Website analysis task {task_id} scheduled for immediate execution', + 'task': { + 'id': task.id, + 'website_url': task.website_url, + 'status': task.status, + 'next_check': task.next_check.isoformat() if task.next_check else None + } + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error retrying website analysis task {task_id}: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to retry website analysis: {str(e)}") + + +@router.get("/platform-insights/logs/{user_id}") +async def get_platform_insights_logs( + user_id: str, + task_id: Optional[int] = Query(None), + limit: int = Query(10, ge=1, le=100), + db: Session = Depends(get_db), + current_user: Dict[str, Any] = Depends(get_current_user) +): + """ + Get execution logs for platform insights tasks. + + Args: + user_id: User ID + task_id: Optional task ID to filter logs + limit: Maximum number of logs to return + + Returns: + List of execution logs + """ + try: + # Verify user can only access their own data + if str(current_user.get('id')) != user_id: + raise HTTPException(status_code=403, detail="Access denied") + + query = db.query(PlatformInsightsExecutionLog).join( + PlatformInsightsTask, + PlatformInsightsExecutionLog.task_id == PlatformInsightsTask.id + ).filter( + PlatformInsightsTask.user_id == user_id + ) + + if task_id: + query = query.filter(PlatformInsightsExecutionLog.task_id == task_id) + + logs = query.order_by( + desc(PlatformInsightsExecutionLog.execution_date) + ).limit(limit).all() + + def format_log(log: PlatformInsightsExecutionLog) -> Dict[str, Any]: + return { + 'id': log.id, + 'task_id': log.task_id, + 'execution_date': log.execution_date.isoformat() if log.execution_date else None, + 'status': log.status, + 'result_data': log.result_data, + 'error_message': log.error_message, + 'execution_time_ms': log.execution_time_ms, + 'data_source': log.data_source, + 'created_at': log.created_at.isoformat() if log.created_at else None + } + + return { + 'success': True, + 'logs': [format_log(log) for log in logs], + 'total_count': len(logs) + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting platform insights logs for user {user_id}: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to get platform insights logs: {str(e)}") + diff --git a/backend/api/wix_routes.py b/backend/api/wix_routes.py index 5f9ac837..fb43aac2 100644 --- a/backend/api/wix_routes.py +++ b/backend/api/wix_routes.py @@ -5,18 +5,24 @@ Handles Wix authentication, connection status, and blog publishing. """ from fastapi import APIRouter, HTTPException, Depends, Request +from fastapi.responses import HTMLResponse from typing import Dict, Any, Optional from loguru import logger from pydantic import BaseModel from services.wix_service import WixService +from services.integrations.wix_oauth import WixOAuthService from middleware.auth_middleware import get_current_user +import os router = APIRouter(prefix="/api/wix", tags=["Wix Integration"]) # Initialize Wix service wix_service = WixService() +# Initialize Wix OAuth service for token storage +wix_oauth_service = WixOAuthService(db_path=os.path.abspath("alwrity.db")) + class WixAuthRequest(BaseModel): """Request model for Wix authentication""" @@ -88,17 +94,41 @@ async def handle_oauth_callback(request: WixAuthRequest, current_user: dict = De Token information and connection status """ try: + user_id = current_user.get('id') + if not user_id: + raise HTTPException(status_code=400, detail="User ID not found") + # Exchange code for tokens tokens = wix_service.exchange_code_for_tokens(request.code) - # Get site information + # Get site information to extract site_id and member_id site_info = wix_service.get_site_info(tokens['access_token']) + site_id = site_info.get('siteId') or site_info.get('site_id') + + # Extract member_id from token if possible + member_id = None + try: + member_id = wix_service.extract_member_id_from_access_token(tokens['access_token']) + except Exception: + pass # Check permissions permissions = wix_service.check_blog_permissions(tokens['access_token']) - # TODO: Store tokens securely in database associated with current_user - # For now, we'll return them (in production, store in encrypted database) + # Store tokens securely in database + stored = wix_oauth_service.store_tokens( + user_id=user_id, + access_token=tokens['access_token'], + refresh_token=tokens.get('refresh_token'), + expires_in=tokens.get('expires_in'), + token_type=tokens.get('token_type', 'Bearer'), + scope=tokens.get('scope'), + site_id=site_id, + member_id=member_id + ) + + if not stored: + logger.warning(f"Failed to store Wix tokens for user {user_id}, but OAuth succeeded") return { "success": True, @@ -125,6 +155,29 @@ async def handle_oauth_callback_get(code: str, state: Optional[str] = None, requ tokens = wix_service.exchange_code_for_tokens(code) site_info = wix_service.get_site_info(tokens['access_token']) permissions = wix_service.check_blog_permissions(tokens['access_token']) + + # Store tokens in database if we have user_id + user_id = current_user.get('id') if current_user else None + if user_id: + site_id = site_info.get('siteId') or site_info.get('site_id') + member_id = None + try: + member_id = wix_service.extract_member_id_from_access_token(tokens['access_token']) + except Exception: + pass + + stored = wix_oauth_service.store_tokens( + user_id=user_id, + access_token=tokens['access_token'], + refresh_token=tokens.get('refresh_token'), + expires_in=tokens.get('expires_in'), + token_type=tokens.get('token_type', 'Bearer'), + scope=tokens.get('scope'), + site_id=site_id, + member_id=member_id + ) + if not stored: + logger.warning(f"Failed to store Wix tokens for user {user_id} in GET callback") # Build success payload for postMessage payload = { diff --git a/backend/models/blog_models.py b/backend/models/blog_models.py index 666ddcc1..ec2d7040 100644 --- a/backend/models/blog_models.py +++ b/backend/models/blog_models.py @@ -1,5 +1,5 @@ from pydantic import BaseModel, Field -from typing import List, Optional, Dict, Any +from typing import List, Optional, Dict, Any, Union from enum import Enum @@ -81,6 +81,7 @@ class ResearchProvider(str, Enum): """Research provider options.""" GOOGLE = "google" # Gemini native grounding EXA = "exa" # Exa neural search + TAVILY = "tavily" # Tavily AI-powered search class ResearchConfig(BaseModel): @@ -100,6 +101,23 @@ class ResearchConfig(BaseModel): exa_include_domains: List[str] = [] # Domain whitelist exa_exclude_domains: List[str] = [] # Domain blacklist exa_search_type: Optional[str] = "auto" # "auto", "keyword", "neural" + + # Tavily-specific options + tavily_topic: Optional[str] = "general" # general, news, finance + tavily_search_depth: Optional[str] = "basic" # basic (1 credit), advanced (2 credits) + tavily_include_domains: List[str] = [] # Domain whitelist (max 300) + tavily_exclude_domains: List[str] = [] # Domain blacklist (max 150) + tavily_include_answer: Union[bool, str] = False # basic, advanced, true, false + tavily_include_raw_content: Union[bool, str] = False # markdown, text, true, false + tavily_include_images: bool = False + tavily_include_image_descriptions: bool = False + tavily_include_favicon: bool = False + tavily_time_range: Optional[str] = None # day, week, month, year, d, w, m, y + tavily_start_date: Optional[str] = None # YYYY-MM-DD + tavily_end_date: Optional[str] = None # YYYY-MM-DD + tavily_country: Optional[str] = None # Country code (only for general topic) + tavily_chunks_per_source: int = 3 # 1-3 (only for advanced search) + tavily_auto_parameters: bool = False # Auto-configure parameters based on query class BlogResearchRequest(BaseModel): diff --git a/backend/models/onboarding.py b/backend/models/onboarding.py index 99e92b7f..f918d742 100644 --- a/backend/models/onboarding.py +++ b/backend/models/onboarding.py @@ -17,6 +17,7 @@ class OnboardingSession(Base): website_analyses = relationship('WebsiteAnalysis', back_populates='session', cascade="all, delete-orphan") research_preferences = relationship('ResearchPreferences', back_populates='session', cascade="all, delete-orphan", uselist=False) persona_data = relationship('PersonaData', back_populates='session', cascade="all, delete-orphan", uselist=False) + competitor_analyses = relationship('CompetitorAnalysis', back_populates='session', cascade="all, delete-orphan") def __repr__(self): return f"" @@ -188,4 +189,46 @@ class PersonaData(Base): 'research_persona_generated_at': self.research_persona_generated_at.isoformat() if self.research_persona_generated_at else None, 'created_at': self.created_at.isoformat() if self.created_at else None, 'updated_at': self.updated_at.isoformat() if self.updated_at else None + } + +class CompetitorAnalysis(Base): + """Stores competitor website analysis results from scheduled analysis tasks.""" + __tablename__ = 'competitor_analyses' + + id = Column(Integer, primary_key=True, autoincrement=True) + session_id = Column(Integer, ForeignKey('onboarding_sessions.id', ondelete='CASCADE'), nullable=False) + competitor_url = Column(String(500), nullable=False) + competitor_domain = Column(String(255), nullable=True) # Extracted domain for easier queries + analysis_date = Column(DateTime, default=func.now()) + + # Complete analysis data (same structure as WebsiteAnalysis) + analysis_data = Column(JSON) # Contains style_analysis, crawl_result, style_patterns, style_guidelines + + # Metadata + status = Column(String(50), default='completed') # completed, failed, in_progress + error_message = Column(Text, nullable=True) + warning_message = Column(Text, nullable=True) + created_at = Column(DateTime, default=func.now()) + updated_at = Column(DateTime, default=func.now(), onupdate=func.now()) + + # Relationships + session = relationship('OnboardingSession', back_populates='competitor_analyses') + + def __repr__(self): + return f"" + + def to_dict(self): + """Convert to dictionary for API responses.""" + return { + 'id': self.id, + 'session_id': self.session_id, + 'competitor_url': self.competitor_url, + 'competitor_domain': self.competitor_domain, + 'analysis_date': self.analysis_date.isoformat() if self.analysis_date else None, + 'analysis_data': self.analysis_data, + 'status': self.status, + 'error_message': self.error_message, + 'warning_message': self.warning_message, + 'created_at': self.created_at.isoformat() if self.created_at else None, + 'updated_at': self.updated_at.isoformat() if self.updated_at else None } \ No newline at end of file diff --git a/backend/models/platform_insights_monitoring_models.py b/backend/models/platform_insights_monitoring_models.py new file mode 100644 index 00000000..1f29e77a --- /dev/null +++ b/backend/models/platform_insights_monitoring_models.py @@ -0,0 +1,100 @@ +""" +Platform Insights Monitoring Models +Database models for tracking platform insights (GSC/Bing) fetch tasks. +""" + +from sqlalchemy import Column, Integer, String, Text, DateTime, JSON, Index, ForeignKey +from sqlalchemy.orm import relationship +from datetime import datetime + +# Import the same Base from enhanced_strategy_models +from models.enhanced_strategy_models import Base + + +class PlatformInsightsTask(Base): + """ + Model for storing platform insights fetch tasks. + + Tracks per-user, per-platform insights fetching with weekly updates. + """ + __tablename__ = "platform_insights_tasks" + + id = Column(Integer, primary_key=True, index=True) + + # User and Platform Identification + user_id = Column(String(255), nullable=False, index=True) # Clerk user ID (string) + platform = Column(String(50), nullable=False) # 'gsc' or 'bing' + site_url = Column(String(500), nullable=True) # Optional: specific site URL + + # Task Status + status = Column(String(50), default='active') # 'active', 'failed', 'paused' + + # Execution Tracking + last_check = Column(DateTime, nullable=True) + last_success = Column(DateTime, nullable=True) + last_failure = Column(DateTime, nullable=True) + failure_reason = Column(Text, nullable=True) + + # Scheduling + next_check = Column(DateTime, nullable=True, index=True) # Next scheduled check time + + # Metadata + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + # Execution Logs Relationship + execution_logs = relationship( + "PlatformInsightsExecutionLog", + back_populates="task", + cascade="all, delete-orphan" + ) + + # Indexes for efficient queries + __table_args__ = ( + Index('idx_platform_insights_user_platform', 'user_id', 'platform'), + Index('idx_platform_insights_next_check', 'next_check'), + Index('idx_platform_insights_status', 'status'), + ) + + def __repr__(self): + return f"" + + +class PlatformInsightsExecutionLog(Base): + """ + Model for storing platform insights fetch execution logs. + + Tracks individual execution attempts with results and error details. + """ + __tablename__ = "platform_insights_execution_logs" + + id = Column(Integer, primary_key=True, index=True) + + # Task Reference + task_id = Column(Integer, ForeignKey("platform_insights_tasks.id"), nullable=False, index=True) + + # Execution Details + execution_date = Column(DateTime, default=datetime.utcnow, nullable=False) + status = Column(String(50), nullable=False) # 'success', 'failed', 'skipped' + + # Results + result_data = Column(JSON, nullable=True) # Insights data, metrics, etc. + error_message = Column(Text, nullable=True) + execution_time_ms = Column(Integer, nullable=True) + data_source = Column(String(50), nullable=True) # 'cached', 'api', 'onboarding' + + # Metadata + created_at = Column(DateTime, default=datetime.utcnow) + + # Relationship to task + task = relationship("PlatformInsightsTask", back_populates="execution_logs") + + # Indexes for efficient queries + __table_args__ = ( + Index('idx_platform_insights_log_task_execution_date', 'task_id', 'execution_date'), + Index('idx_platform_insights_log_status', 'status'), + ) + + def __repr__(self): + return f"" + diff --git a/backend/models/scheduler_cumulative_stats_model.py b/backend/models/scheduler_cumulative_stats_model.py new file mode 100644 index 00000000..5afb9946 --- /dev/null +++ b/backend/models/scheduler_cumulative_stats_model.py @@ -0,0 +1,48 @@ +""" +Scheduler Cumulative Stats Model +Model for storing persistent cumulative scheduler metrics that survive restarts. +""" + +from sqlalchemy import Column, Integer, DateTime, Index +from datetime import datetime +from models.enhanced_strategy_models import Base + + +class SchedulerCumulativeStats(Base): + """Model for storing cumulative scheduler metrics that persist across restarts""" + __tablename__ = "scheduler_cumulative_stats" + + id = Column(Integer, primary_key=True, index=True, default=1) # Always use id=1 + total_check_cycles = Column(Integer, default=0, nullable=False) + cumulative_tasks_found = Column(Integer, default=0, nullable=False) + cumulative_tasks_executed = Column(Integer, default=0, nullable=False) + cumulative_tasks_failed = Column(Integer, default=0, nullable=False) + cumulative_tasks_skipped = Column(Integer, default=0, nullable=False) + cumulative_job_completed = Column(Integer, default=0, nullable=False) + cumulative_job_failed = Column(Integer, default=0, nullable=False) + + last_updated = Column(DateTime, default=datetime.utcnow, nullable=False, onupdate=datetime.utcnow) + last_check_cycle_id = Column(Integer, nullable=True) # Reference to last check_cycle event log ID + + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column(DateTime, default=datetime.utcnow, nullable=False, onupdate=datetime.utcnow) + + __table_args__ = ( + Index('idx_scheduler_cumulative_stats_single_row', 'id', unique=True), + ) + + @classmethod + def get_or_create(cls, db_session): + """ + Get the cumulative stats row (id=1) or create it if it doesn't exist. + + Returns: + SchedulerCumulativeStats instance + """ + stats = db_session.query(cls).filter(cls.id == 1).first() + if not stats: + stats = cls(id=1) + db_session.add(stats) + db_session.commit() + return stats + diff --git a/backend/models/website_analysis_monitoring_models.py b/backend/models/website_analysis_monitoring_models.py new file mode 100644 index 00000000..d20a92ba --- /dev/null +++ b/backend/models/website_analysis_monitoring_models.py @@ -0,0 +1,105 @@ +""" +Website Analysis Monitoring Models +Database models for tracking website analysis tasks and execution logs. +""" + +from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean, JSON, Index, ForeignKey +from sqlalchemy.orm import relationship +from datetime import datetime + +# Import the same Base from enhanced_strategy_models +from models.enhanced_strategy_models import Base + + +class WebsiteAnalysisTask(Base): + """ + Model for storing website analysis monitoring tasks. + + Tracks per-user, per-URL website analysis with recurring checks. + """ + __tablename__ = "website_analysis_tasks" + + id = Column(Integer, primary_key=True, index=True) + + # User and URL Identification + user_id = Column(String(255), nullable=False, index=True) # Clerk user ID (string) + website_url = Column(String(500), nullable=False) # URL to analyze + task_type = Column(String(50), nullable=False) # 'user_website' or 'competitor' + competitor_id = Column(String(255), nullable=True) # For competitor tasks (domain or identifier) + + # Task Status + status = Column(String(50), default='active') # 'active', 'failed', 'paused' + + # Execution Tracking + last_check = Column(DateTime, nullable=True) + last_success = Column(DateTime, nullable=True) + last_failure = Column(DateTime, nullable=True) + failure_reason = Column(Text, nullable=True) + + # Scheduling + next_check = Column(DateTime, nullable=True, index=True) # Next scheduled check time + frequency_days = Column(Integer, default=10) # Recurring frequency in days + + # Metadata + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + # Execution Logs Relationship + execution_logs = relationship( + "WebsiteAnalysisExecutionLog", + back_populates="task", + cascade="all, delete-orphan" + ) + + # Indexes for efficient queries + # Note: Index names match migration script to avoid conflicts + __table_args__ = ( + Index('idx_website_analysis_tasks_user_url', 'user_id', 'website_url'), + Index('idx_website_analysis_tasks_user_task_type', 'user_id', 'task_type'), + Index('idx_website_analysis_tasks_next_check', 'next_check'), + Index('idx_website_analysis_tasks_status', 'status'), + Index('idx_website_analysis_tasks_task_type', 'task_type'), + ) + + def __repr__(self): + return f"" + + +class WebsiteAnalysisExecutionLog(Base): + """ + Model for storing website analysis execution logs. + + Tracks individual execution attempts with results and error details. + """ + __tablename__ = "website_analysis_execution_logs" + + id = Column(Integer, primary_key=True, index=True) + + # Task Reference + task_id = Column(Integer, ForeignKey("website_analysis_tasks.id"), nullable=False, index=True) + + # Execution Details + execution_date = Column(DateTime, default=datetime.utcnow, nullable=False) + status = Column(String(50), nullable=False) # 'success', 'failed', 'skipped', 'running' + + # Results + result_data = Column(JSON, nullable=True) # Analysis results (style_analysis, crawl_result, etc.) + error_message = Column(Text, nullable=True) + execution_time_ms = Column(Integer, nullable=True) + + # Metadata + created_at = Column(DateTime, default=datetime.utcnow) + + # Relationship to task + task = relationship("WebsiteAnalysisTask", back_populates="execution_logs") + + # Indexes for efficient queries + # Note: Index names match migration script to avoid conflicts + __table_args__ = ( + Index('idx_website_analysis_execution_logs_task_execution_date', 'task_id', 'execution_date'), + Index('idx_website_analysis_execution_logs_status', 'status'), + ) + + def __repr__(self): + return f"" + diff --git a/backend/routers/bing_oauth.py b/backend/routers/bing_oauth.py index 467904cb..8d290170 100644 --- a/backend/routers/bing_oauth.py +++ b/backend/routers/bing_oauth.py @@ -160,6 +160,43 @@ async def handle_bing_callback( """ return HTMLResponse(content=html_content) + # Create Bing insights task immediately after successful connection + try: + from services.database import SessionLocal + from services.platform_insights_monitoring_service import create_platform_insights_task + + # Get user_id from state (stored during OAuth flow) + db = SessionLocal() + try: + # Get user_id from Bing OAuth service state lookup + import sqlite3 + with sqlite3.connect(oauth_service.db_path) as conn: + cursor = conn.cursor() + cursor.execute('SELECT user_id FROM bing_oauth_states WHERE state = ?', (state,)) + result_db = cursor.fetchone() + if result_db: + user_id = result_db[0] + + # Don't fetch site_url here - it requires API calls + # The executor will fetch it when the task runs (weekly) + # Create insights task without site_url to avoid API calls + task_result = create_platform_insights_task( + user_id=user_id, + platform='bing', + site_url=None, # Will be fetched by executor when task runs + db=db + ) + + if task_result.get('success'): + logger.info(f"Created Bing insights task for user {user_id}") + else: + logger.warning(f"Failed to create Bing insights task: {task_result.get('error')}") + finally: + db.close() + except Exception as e: + # Non-critical: log but don't fail OAuth callback + logger.warning(f"Failed to create Bing insights task after OAuth: {e}") + # Return success page with postMessage script html_content = f""" diff --git a/backend/routers/gsc_auth.py b/backend/routers/gsc_auth.py index 052905c8..165c74bd 100644 --- a/backend/routers/gsc_auth.py +++ b/backend/routers/gsc_auth.py @@ -66,6 +66,45 @@ async def handle_gsc_callback( if success: logger.info("GSC OAuth callback handled successfully") + + # Create GSC insights task immediately after successful connection + try: + from services.database import SessionLocal + from services.platform_insights_monitoring_service import create_platform_insights_task + + # Get user_id from state (stored during OAuth flow) + # Note: handle_oauth_callback already deleted state, so we need to get user_id from recent credentials + db = SessionLocal() + try: + # Get user_id from most recent GSC credentials (since state was deleted) + import sqlite3 + with sqlite3.connect(gsc_service.db_path) as conn: + cursor = conn.cursor() + cursor.execute('SELECT user_id FROM gsc_credentials ORDER BY updated_at DESC LIMIT 1') + result = cursor.fetchone() + if result: + user_id = result[0] + + # Don't fetch site_url here - it requires API calls + # The executor will fetch it when the task runs (weekly) + # Create insights task without site_url to avoid API calls + task_result = create_platform_insights_task( + user_id=user_id, + platform='gsc', + site_url=None, # Will be fetched by executor when task runs + db=db + ) + + if task_result.get('success'): + logger.info(f"Created GSC insights task for user {user_id}") + else: + logger.warning(f"Failed to create GSC insights task: {task_result.get('error')}") + finally: + db.close() + except Exception as e: + # Non-critical: log but don't fail OAuth callback + logger.warning(f"Failed to create GSC insights task after OAuth: {e}", exc_info=True) + html = """ diff --git a/backend/scripts/fix_website_analysis_indexes.py b/backend/scripts/fix_website_analysis_indexes.py new file mode 100644 index 00000000..6fa6b371 --- /dev/null +++ b/backend/scripts/fix_website_analysis_indexes.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +""" +Fix website analysis index name conflicts. +Drops old conflicting indexes and ensures proper index names. +""" + +import sys +import os +import sqlite3 +from pathlib import Path +from loguru import logger + +# Add the backend directory to the Python path +backend_dir = Path(__file__).parent.parent +sys.path.insert(0, str(backend_dir)) + +def fix_indexes(): + """Fix index name conflicts.""" + db_path = backend_dir / "alwrity.db" + + if not db_path.exists(): + logger.error(f"Database not found at {db_path}") + return False + + conn = sqlite3.connect(str(db_path)) + cursor = conn.cursor() + + try: + # Check for old conflicting indexes + cursor.execute(""" + SELECT name, tbl_name + FROM sqlite_master + WHERE type='index' + AND name = 'idx_status' + AND tbl_name IN ('website_analysis_tasks', 'website_analysis_execution_logs') + """) + + conflicting = cursor.fetchall() + + if conflicting: + logger.warning(f"Found {len(conflicting)} conflicting indexes:") + for name, tbl_name in conflicting: + logger.warning(f" - {name} on {tbl_name}") + + # Drop old indexes + for name, tbl_name in conflicting: + try: + cursor.execute(f"DROP INDEX IF EXISTS {name}") + logger.info(f"✅ Dropped old index: {name} on {tbl_name}") + except Exception as e: + logger.error(f"❌ Error dropping index {name}: {e}") + + conn.commit() + logger.info("✅ Index conflicts resolved") + else: + logger.info("✅ No conflicting indexes found") + + # Verify correct indexes exist + cursor.execute(""" + SELECT name, tbl_name + FROM sqlite_master + WHERE type='index' + AND (name LIKE '%website_analysis%' OR name LIKE '%competitor_analyses%') + ORDER BY tbl_name, name + """) + + indexes = cursor.fetchall() + logger.info(f"\n📋 Current website analysis indexes ({len(indexes)}):") + for name, tbl_name in indexes: + logger.info(f" - {name} on {tbl_name}") + + return True + + except Exception as e: + logger.error(f"Error fixing indexes: {e}") + conn.rollback() + return False + finally: + conn.close() + +if __name__ == "__main__": + logger.info("🔧 Fixing website analysis index conflicts...") + success = fix_indexes() + if success: + logger.info("✅ Index fix complete. You can now restart the backend.") + sys.exit(0) + else: + logger.error("❌ Index fix failed") + sys.exit(1) + diff --git a/backend/scripts/run_cumulative_stats_migration.py b/backend/scripts/run_cumulative_stats_migration.py new file mode 100644 index 00000000..740a3eaf --- /dev/null +++ b/backend/scripts/run_cumulative_stats_migration.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +""" +Script to run the cumulative stats migration. +This creates the scheduler_cumulative_stats table. +""" + +import sqlite3 +import os +import sys + +# Get the database path +script_dir = os.path.dirname(os.path.abspath(__file__)) +backend_dir = os.path.dirname(script_dir) +db_path = os.path.join(backend_dir, 'alwrity.db') +migration_path = os.path.join(backend_dir, 'database', 'migrations', 'create_scheduler_cumulative_stats.sql') + +if not os.path.exists(db_path): + print(f"❌ Database not found at {db_path}") + sys.exit(1) + +if not os.path.exists(migration_path): + print(f"❌ Migration file not found at {migration_path}") + sys.exit(1) + +try: + conn = sqlite3.connect(db_path) + with open(migration_path, 'r') as f: + conn.executescript(f.read()) + conn.commit() + print("✅ Migration executed successfully") + conn.close() +except Exception as e: + print(f"❌ Error running migration: {e}") + sys.exit(1) + diff --git a/backend/scripts/verify_cumulative_stats.py b/backend/scripts/verify_cumulative_stats.py new file mode 100644 index 00000000..8b64c754 --- /dev/null +++ b/backend/scripts/verify_cumulative_stats.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +"""Verify cumulative stats table exists and has data""" + +import sqlite3 +import os + +script_dir = os.path.dirname(os.path.abspath(__file__)) +backend_dir = os.path.dirname(script_dir) +db_path = os.path.join(backend_dir, 'alwrity.db') + +conn = sqlite3.connect(db_path) +cursor = conn.cursor() + +# Check if table exists +cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='scheduler_cumulative_stats'") +result = cursor.fetchone() +print(f"Table exists: {result is not None}") + +if result: + cursor.execute("SELECT * FROM scheduler_cumulative_stats WHERE id=1") + row = cursor.fetchone() + if row: + print(f"Row data: {row}") + else: + print("Table exists but no row with id=1") +else: + print("Table does not exist") + +conn.close() + diff --git a/backend/services/blog_writer/research/__init__.py b/backend/services/blog_writer/research/__init__.py index 87035e12..d19bcc07 100644 --- a/backend/services/blog_writer/research/__init__.py +++ b/backend/services/blog_writer/research/__init__.py @@ -16,6 +16,7 @@ from .data_filter import ResearchDataFilter from .base_provider import ResearchProvider as BaseResearchProvider from .google_provider import GoogleResearchProvider from .exa_provider import ExaResearchProvider +from .tavily_provider import TavilyResearchProvider __all__ = [ 'ResearchService', @@ -26,4 +27,5 @@ __all__ = [ 'BaseResearchProvider', 'GoogleResearchProvider', 'ExaResearchProvider', + 'TavilyResearchProvider', ] diff --git a/backend/services/blog_writer/research/research_service.py b/backend/services/blog_writer/research/research_service.py index da25a177..42ccd373 100644 --- a/backend/services/blog_writer/research/research_service.py +++ b/backend/services/blog_writer/research/research_service.py @@ -150,8 +150,94 @@ class ResearchService: raw_result = None else: raise + + elif config.provider == ResearchProvider.TAVILY: + # Tavily research workflow + from .tavily_provider import TavilyResearchProvider + from services.database import get_db + from services.subscription import PricingService + import os + import time - if config.provider != ResearchProvider.EXA: + # Pre-flight validation (similar to Exa) + db_val = next(get_db()) + try: + pricing_service = PricingService(db_val) + # Check Tavily usage limits + limits = pricing_service.get_user_limits(user_id) + tavily_limit = limits.get('limits', {}).get('tavily_calls', 0) if limits else 0 + + # Get current usage + from models.subscription_models import UsageSummary + from datetime import datetime + current_period = pricing_service.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m") + usage = db_val.query(UsageSummary).filter( + UsageSummary.user_id == user_id, + UsageSummary.billing_period == current_period + ).first() + + current_calls = getattr(usage, 'tavily_calls', 0) or 0 if usage else 0 + + if tavily_limit > 0 and current_calls >= tavily_limit: + raise HTTPException( + status_code=429, + detail={ + 'error': 'Tavily API call limit exceeded', + 'message': f'You have reached your Tavily API call limit ({tavily_limit} calls). Please upgrade your plan or wait for the next billing period.', + 'provider': 'tavily', + 'usage_info': { + 'current': current_calls, + 'limit': tavily_limit + } + } + ) + except HTTPException: + raise + except Exception as e: + logger.warning(f"Error checking Tavily limits: {e}") + finally: + db_val.close() + + # Execute Tavily search + api_start_time = time.time() + try: + tavily_provider = TavilyResearchProvider() + raw_result = await tavily_provider.search( + research_prompt, topic, industry, target_audience, config, user_id + ) + api_duration_ms = (time.time() - api_start_time) * 1000 + + # Track usage + cost = raw_result.get('cost', {}).get('total', 0.001) if isinstance(raw_result.get('cost'), dict) else 0.001 + search_depth = config.tavily_search_depth or "basic" + tavily_provider.track_tavily_usage(user_id, cost, search_depth) + + # Log API call performance + blog_writer_logger.log_api_call( + "tavily_search", + "search", + api_duration_ms, + token_usage={}, + content_length=len(raw_result.get('content', '')) + ) + + # Extract content for downstream analysis + content = raw_result.get('content', '') + sources = raw_result.get('sources', []) + search_widget = "" # Tavily doesn't provide search widgets + search_queries = raw_result.get('search_queries', []) + grounding_metadata = None # Tavily doesn't provide grounding metadata + + except RuntimeError as e: + if "TAVILY_API_KEY not configured" in str(e): + logger.warning("Tavily not configured, falling back to Google") + config.provider = ResearchProvider.GOOGLE + # Continue to Google flow below + raw_result = None + else: + raise + + if config.provider not in [ResearchProvider.EXA, ResearchProvider.TAVILY]: # Google research (existing flow) or fallback from Exa from .google_provider import GoogleResearchProvider import time @@ -412,8 +498,94 @@ class ResearchService: # Continue to Google flow below else: raise + + elif config.provider == ResearchProvider.TAVILY: + # Tavily research workflow + from .tavily_provider import TavilyResearchProvider + from services.database import get_db + from services.subscription import PricingService + import os - if config.provider != ResearchProvider.EXA: + await task_manager.update_progress(task_id, "🌐 Connecting to Tavily AI search...") + + # Pre-flight validation + db_val = next(get_db()) + try: + pricing_service = PricingService(db_val) + # Check Tavily usage limits + limits = pricing_service.get_user_limits(user_id) + tavily_limit = limits.get('limits', {}).get('tavily_calls', 0) if limits else 0 + + # Get current usage + from models.subscription_models import UsageSummary + from datetime import datetime + current_period = pricing_service.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m") + usage = db_val.query(UsageSummary).filter( + UsageSummary.user_id == user_id, + UsageSummary.billing_period == current_period + ).first() + + current_calls = getattr(usage, 'tavily_calls', 0) or 0 if usage else 0 + + if tavily_limit > 0 and current_calls >= tavily_limit: + await task_manager.update_progress(task_id, f"❌ Tavily API call limit exceeded ({current_calls}/{tavily_limit})") + raise HTTPException( + status_code=429, + detail={ + 'error': 'Tavily API call limit exceeded', + 'message': f'You have reached your Tavily API call limit ({tavily_limit} calls). Please upgrade your plan or wait for the next billing period.', + 'provider': 'tavily', + 'usage_info': { + 'current': current_calls, + 'limit': tavily_limit + } + } + ) + except HTTPException: + raise + except Exception as e: + logger.warning(f"Error checking Tavily limits: {e}") + finally: + db_val.close() + + # Execute Tavily search + await task_manager.update_progress(task_id, "🤖 Executing Tavily AI search...") + try: + tavily_provider = TavilyResearchProvider() + raw_result = await tavily_provider.search( + research_prompt, topic, industry, target_audience, config, user_id + ) + + # Track usage + cost = raw_result.get('cost', {}).get('total', 0.001) if isinstance(raw_result.get('cost'), dict) else 0.001 + search_depth = config.tavily_search_depth or "basic" + tavily_provider.track_tavily_usage(user_id, cost, search_depth) + + # Extract content for downstream analysis + if raw_result is None: + logger.error("raw_result is None after Tavily search") + raise ValueError("Tavily research result is None - search operation failed unexpectedly") + + if not isinstance(raw_result, dict): + logger.warning(f"raw_result is not a dict (type: {type(raw_result)}), using defaults") + raw_result = {} + + content = raw_result.get('content', '') + sources = raw_result.get('sources', []) or [] + search_widget = "" # Tavily doesn't provide search widgets + search_queries = raw_result.get('search_queries', []) or [] + grounding_metadata = None # Tavily doesn't provide grounding metadata + + except RuntimeError as e: + if "TAVILY_API_KEY not configured" in str(e): + logger.warning("Tavily not configured, falling back to Google") + await task_manager.update_progress(task_id, "⚠️ Tavily not configured, falling back to Google Search") + config.provider = ResearchProvider.GOOGLE + # Continue to Google flow below + else: + raise + + if config.provider not in [ResearchProvider.EXA, ResearchProvider.TAVILY]: # Google research (existing flow) from .google_provider import GoogleResearchProvider diff --git a/backend/services/blog_writer/research/tavily_provider.py b/backend/services/blog_writer/research/tavily_provider.py new file mode 100644 index 00000000..410555b4 --- /dev/null +++ b/backend/services/blog_writer/research/tavily_provider.py @@ -0,0 +1,169 @@ +""" +Tavily Research Provider + +AI-powered search implementation using Tavily API for high-quality research. +""" + +import os +from loguru import logger +from models.subscription_models import APIProvider +from services.research.tavily_service import TavilyService +from .base_provider import ResearchProvider as BaseProvider + + +class TavilyResearchProvider(BaseProvider): + """Tavily AI-powered search provider.""" + + def __init__(self): + self.api_key = os.getenv("TAVILY_API_KEY") + if not self.api_key: + raise RuntimeError("TAVILY_API_KEY not configured") + self.tavily_service = TavilyService() + logger.info("✅ Tavily Research Provider initialized") + + async def search(self, prompt, topic, industry, target_audience, config, user_id): + """Execute Tavily search and return standardized results.""" + # Build Tavily query + query = f"{topic} {industry} {target_audience}" + + # Get Tavily-specific config options + topic = config.tavily_topic or "general" + search_depth = config.tavily_search_depth or "basic" + + logger.info(f"[Tavily Research] Executing search: {query}") + + # Execute Tavily search + result = await self.tavily_service.search( + query=query, + topic=topic, + search_depth=search_depth, + max_results=min(config.max_sources, 20), + include_domains=config.tavily_include_domains or None, + exclude_domains=config.tavily_exclude_domains or None, + include_answer=config.tavily_include_answer or False, + include_raw_content=config.tavily_include_raw_content or False, + include_images=config.tavily_include_images or False, + include_image_descriptions=config.tavily_include_image_descriptions or False, + time_range=config.tavily_time_range, + start_date=config.tavily_start_date, + end_date=config.tavily_end_date, + country=config.tavily_country, + chunks_per_source=config.tavily_chunks_per_source or 3, + auto_parameters=config.tavily_auto_parameters or False + ) + + if not result.get("success"): + raise RuntimeError(f"Tavily search failed: {result.get('error', 'Unknown error')}") + + # Transform to standardized format + sources = self._transform_sources(result.get("results", [])) + content = self._aggregate_content(result.get("results", [])) + + # Calculate cost (basic = 1 credit, advanced = 2 credits) + cost = 0.001 if search_depth == "basic" else 0.002 # Estimate cost per search + + logger.info(f"[Tavily Research] Search completed: {len(sources)} sources, depth: {search_depth}") + + return { + 'sources': sources, + 'content': content, + 'search_type': search_depth, + 'provider': 'tavily', + 'search_queries': [query], + 'cost': {'total': cost}, + 'answer': result.get("answer"), # If include_answer was requested + 'images': result.get("images", []) + } + + def get_provider_enum(self): + """Return TAVILY provider enum for subscription tracking.""" + return APIProvider.TAVILY + + def estimate_tokens(self) -> int: + """Estimate token usage for Tavily (not token-based, but we estimate API calls).""" + return 0 # Tavily is per-search, not token-based + + def _transform_sources(self, results): + """Transform Tavily results to ResearchSource format.""" + sources = [] + for idx, result in enumerate(results): + source_type = self._determine_source_type(result.get("url", "")) + + sources.append({ + 'title': result.get("title", ""), + 'url': result.get("url", ""), + 'excerpt': result.get("content", "")[:500], # First 500 chars + 'credibility_score': result.get("relevance_score", 0.5), + 'published_at': result.get("published_date"), + 'index': idx, + 'source_type': source_type, + 'content': result.get("content", ""), + 'raw_content': result.get("raw_content"), # If include_raw_content was requested + 'score': result.get("score", result.get("relevance_score", 0.5)), + 'favicon': result.get("favicon") + }) + + return sources + + def _determine_source_type(self, url): + """Determine source type from URL.""" + if not url: + return 'web' + + url_lower = url.lower() + if 'arxiv.org' in url_lower or 'research' in url_lower or '.edu' in url_lower: + return 'academic' + elif any(news in url_lower for news in ['cnn.com', 'bbc.com', 'reuters.com', 'theguardian.com', 'nytimes.com']): + return 'news' + elif 'linkedin.com' in url_lower: + return 'expert' + elif '.gov' in url_lower: + return 'government' + else: + return 'web' + + def _aggregate_content(self, results): + """Aggregate content from Tavily results for LLM analysis.""" + content_parts = [] + + for idx, result in enumerate(results): + content = result.get("content", "") + if content: + content_parts.append(f"Source {idx + 1}: {content}") + + return "\n\n".join(content_parts) + + def track_tavily_usage(self, user_id: str, cost: float, search_depth: str): + """Track Tavily API usage after successful call.""" + from services.database import get_db + from services.subscription import PricingService + from sqlalchemy import text + + db = next(get_db()) + try: + pricing_service = PricingService(db) + current_period = pricing_service.get_current_billing_period(user_id) + + # Update tavily_calls and tavily_cost via SQL UPDATE + update_query = text(""" + UPDATE usage_summaries + SET tavily_calls = COALESCE(tavily_calls, 0) + 1, + tavily_cost = COALESCE(tavily_cost, 0) + :cost, + total_calls = COALESCE(total_calls, 0) + 1, + total_cost = COALESCE(total_cost, 0) + :cost + WHERE user_id = :user_id AND billing_period = :period + """) + db.execute(update_query, { + 'cost': cost, + 'user_id': user_id, + 'period': current_period + }) + db.commit() + + logger.info(f"[Tavily] Tracked usage: user={user_id}, cost=${cost}, depth={search_depth}") + except Exception as e: + logger.error(f"[Tavily] Failed to track usage: {e}", exc_info=True) + db.rollback() + finally: + db.close() + diff --git a/backend/services/integrations/wix_oauth.py b/backend/services/integrations/wix_oauth.py new file mode 100644 index 00000000..82e2f751 --- /dev/null +++ b/backend/services/integrations/wix_oauth.py @@ -0,0 +1,265 @@ +""" +Wix OAuth2 Service +Handles Wix OAuth2 authentication flow and token storage. +""" + +import os +import sqlite3 +from typing import Optional, Dict, Any, List +from datetime import datetime, timedelta +from loguru import logger + + +class WixOAuthService: + """Manages Wix OAuth2 authentication flow and token storage.""" + + def __init__(self, db_path: str = "alwrity.db"): + self.db_path = db_path + self._init_db() + + def _init_db(self): + """Initialize database tables for OAuth tokens.""" + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS wix_oauth_tokens ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id TEXT NOT NULL, + access_token TEXT NOT NULL, + refresh_token TEXT, + token_type TEXT DEFAULT 'bearer', + expires_at TIMESTAMP, + expires_in INTEGER, + scope TEXT, + site_id TEXT, + member_id TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + is_active BOOLEAN DEFAULT TRUE + ) + ''') + conn.commit() + logger.info("Wix OAuth database initialized.") + + def store_tokens( + self, + user_id: str, + access_token: str, + refresh_token: Optional[str] = None, + expires_in: Optional[int] = None, + token_type: str = 'bearer', + scope: Optional[str] = None, + site_id: Optional[str] = None, + member_id: Optional[str] = None + ) -> bool: + """ + Store Wix OAuth tokens in the database. + + Args: + user_id: User ID (Clerk string) + access_token: Access token from Wix + refresh_token: Optional refresh token + expires_in: Optional expiration time in seconds + token_type: Token type (default: 'bearer') + scope: Optional OAuth scope + site_id: Optional Wix site ID + member_id: Optional Wix member ID + + Returns: + True if tokens were stored successfully + """ + try: + expires_at = None + if expires_in: + expires_at = datetime.now() + timedelta(seconds=expires_in) + + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + cursor.execute(''' + INSERT INTO wix_oauth_tokens + (user_id, access_token, refresh_token, token_type, expires_at, expires_in, scope, site_id, member_id) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ''', (user_id, access_token, refresh_token, token_type, expires_at, expires_in, scope, site_id, member_id)) + conn.commit() + logger.info(f"Wix OAuth: Token inserted into database for user {user_id}") + + return True + + except Exception as e: + logger.error(f"Error storing Wix tokens for user {user_id}: {e}") + return False + + def get_user_tokens(self, user_id: str) -> List[Dict[str, Any]]: + """Get all active Wix tokens for a user.""" + try: + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + cursor.execute(''' + SELECT id, access_token, refresh_token, token_type, expires_at, expires_in, scope, site_id, member_id, created_at + FROM wix_oauth_tokens + WHERE user_id = ? AND is_active = TRUE AND (expires_at IS NULL OR expires_at > datetime('now')) + ORDER BY created_at DESC + ''', (user_id,)) + + tokens = [] + for row in cursor.fetchall(): + tokens.append({ + "id": row[0], + "access_token": row[1], + "refresh_token": row[2], + "token_type": row[3], + "expires_at": row[4], + "expires_in": row[5], + "scope": row[6], + "site_id": row[7], + "member_id": row[8], + "created_at": row[9] + }) + + return tokens + + except Exception as e: + logger.error(f"Error getting Wix tokens for user {user_id}: {e}") + return [] + + def get_user_token_status(self, user_id: str) -> Dict[str, Any]: + """Get detailed token status for a user including expired tokens.""" + try: + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + # Get all tokens (active and expired) + cursor.execute(''' + SELECT id, access_token, refresh_token, token_type, expires_at, expires_in, scope, site_id, member_id, created_at, is_active + FROM wix_oauth_tokens + WHERE user_id = ? + ORDER BY created_at DESC + ''', (user_id,)) + + all_tokens = [] + active_tokens = [] + expired_tokens = [] + + for row in cursor.fetchall(): + token_data = { + "id": row[0], + "access_token": row[1], + "refresh_token": row[2], + "token_type": row[3], + "expires_at": row[4], + "expires_in": row[5], + "scope": row[6], + "site_id": row[7], + "member_id": row[8], + "created_at": row[9], + "is_active": bool(row[10]) + } + all_tokens.append(token_data) + + # Determine expiry using robust parsing and is_active flag + is_active_flag = bool(row[10]) + not_expired = False + try: + expires_at_val = row[4] + if expires_at_val: + # First try Python parsing + try: + dt = datetime.fromisoformat(expires_at_val) if isinstance(expires_at_val, str) else expires_at_val + not_expired = dt > datetime.now() + except Exception: + # Fallback to SQLite comparison + cursor.execute("SELECT datetime('now') < ?", (expires_at_val,)) + not_expired = cursor.fetchone()[0] == 1 + else: + # No expiry stored => consider not expired + not_expired = True + except Exception: + not_expired = False + + if is_active_flag and not_expired: + active_tokens.append(token_data) + else: + expired_tokens.append(token_data) + + return { + "has_tokens": len(all_tokens) > 0, + "has_active_tokens": len(active_tokens) > 0, + "has_expired_tokens": len(expired_tokens) > 0, + "active_tokens": active_tokens, + "expired_tokens": expired_tokens, + "total_tokens": len(all_tokens), + "last_token_date": all_tokens[0]["created_at"] if all_tokens else None + } + + except Exception as e: + logger.error(f"Error getting Wix token status for user {user_id}: {e}") + return { + "has_tokens": False, + "has_active_tokens": False, + "has_expired_tokens": False, + "active_tokens": [], + "expired_tokens": [], + "total_tokens": 0, + "last_token_date": None, + "error": str(e) + } + + def update_tokens( + self, + user_id: str, + access_token: str, + refresh_token: Optional[str] = None, + expires_in: Optional[int] = None + ) -> bool: + """Update tokens for a user (e.g., after refresh).""" + try: + expires_at = None + if expires_in: + expires_at = datetime.now() + timedelta(seconds=expires_in) + + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + if refresh_token: + cursor.execute(''' + UPDATE wix_oauth_tokens + SET access_token = ?, refresh_token = ?, expires_at = ?, expires_in = ?, + is_active = TRUE, updated_at = datetime('now') + WHERE user_id = ? AND refresh_token = ? + ''', (access_token, refresh_token, expires_at, expires_in, user_id, refresh_token)) + else: + cursor.execute(''' + UPDATE wix_oauth_tokens + SET access_token = ?, expires_at = ?, expires_in = ?, + is_active = TRUE, updated_at = datetime('now') + WHERE user_id = ? AND id = (SELECT id FROM wix_oauth_tokens WHERE user_id = ? ORDER BY created_at DESC LIMIT 1) + ''', (access_token, expires_at, expires_in, user_id, user_id)) + conn.commit() + logger.info(f"Wix OAuth: Tokens updated for user {user_id}") + + return True + + except Exception as e: + logger.error(f"Error updating Wix tokens for user {user_id}: {e}") + return False + + def revoke_token(self, user_id: str, token_id: int) -> bool: + """Revoke a Wix OAuth token.""" + try: + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + cursor.execute(''' + UPDATE wix_oauth_tokens + SET is_active = FALSE, updated_at = datetime('now') + WHERE user_id = ? AND id = ? + ''', (user_id, token_id)) + conn.commit() + + if cursor.rowcount > 0: + logger.info(f"Wix token {token_id} revoked for user {user_id}") + return True + return False + + except Exception as e: + logger.error(f"Error revoking Wix token: {e}") + return False + diff --git a/backend/services/integrations/wordpress_oauth.py b/backend/services/integrations/wordpress_oauth.py index 578d3b21..e2bdc5d9 100644 --- a/backend/services/integrations/wordpress_oauth.py +++ b/backend/services/integrations/wordpress_oauth.py @@ -218,6 +218,87 @@ class WordPressOAuthService: logger.error(f"Error getting WordPress tokens for user {user_id}: {e}") return [] + def get_user_token_status(self, user_id: str) -> Dict[str, Any]: + """Get detailed token status for a user including expired tokens.""" + try: + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + # Get all tokens (active and expired) + cursor.execute(''' + SELECT id, access_token, refresh_token, token_type, expires_at, scope, blog_id, blog_url, created_at, is_active + FROM wordpress_oauth_tokens + WHERE user_id = ? + ORDER BY created_at DESC + ''', (user_id,)) + + all_tokens = [] + active_tokens = [] + expired_tokens = [] + + for row in cursor.fetchall(): + token_data = { + "id": row[0], + "access_token": row[1], + "refresh_token": row[2], + "token_type": row[3], + "expires_at": row[4], + "scope": row[5], + "blog_id": row[6], + "blog_url": row[7], + "created_at": row[8], + "is_active": bool(row[9]) + } + all_tokens.append(token_data) + + # Determine expiry using robust parsing and is_active flag + is_active_flag = bool(row[9]) + not_expired = False + try: + expires_at_val = row[4] + if expires_at_val: + # First try Python parsing + try: + dt = datetime.fromisoformat(expires_at_val) if isinstance(expires_at_val, str) else expires_at_val + not_expired = dt > datetime.now() + except Exception: + # Fallback to SQLite comparison + cursor.execute("SELECT datetime('now') < ?", (expires_at_val,)) + not_expired = cursor.fetchone()[0] == 1 + else: + # No expiry stored => consider not expired + not_expired = True + except Exception: + not_expired = False + + if is_active_flag and not_expired: + active_tokens.append(token_data) + else: + expired_tokens.append(token_data) + + return { + "has_tokens": len(all_tokens) > 0, + "has_active_tokens": len(active_tokens) > 0, + "has_expired_tokens": len(expired_tokens) > 0, + "active_tokens": active_tokens, + "expired_tokens": expired_tokens, + "total_tokens": len(all_tokens), + "last_token_date": all_tokens[0]["created_at"] if all_tokens else None + } + + except Exception as e: + logger.error(f"Error getting WordPress token status for user {user_id}: {e}") + return { + "has_tokens": False, + "has_active_tokens": False, + "has_expired_tokens": False, + "active_tokens": [], + "expired_tokens": [], + "total_tokens": 0, + "last_token_date": None, + "error": str(e) + } + def test_token(self, access_token: str) -> bool: """Test if a WordPress access token is valid.""" try: diff --git a/backend/services/oauth_token_monitoring_service.py b/backend/services/oauth_token_monitoring_service.py index da471d2c..b214f0d3 100644 --- a/backend/services/oauth_token_monitoring_service.py +++ b/backend/services/oauth_token_monitoring_service.py @@ -16,9 +16,7 @@ from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask from services.gsc_service import GSCService from services.integrations.bing_oauth import BingOAuthService from services.integrations.wordpress_oauth import WordPressOAuthService - -# Note: Wix tokens are stored in frontend sessionStorage, not backend database -# So we cannot check for Wix connections from the backend yet +from services.integrations.wix_oauth import WixOAuthService def get_connected_platforms(user_id: str) -> List[str]: @@ -29,7 +27,7 @@ def get_connected_platforms(user_id: str) -> List[str]: - GSC: gsc_credentials table - Bing: bing_oauth_tokens table - WordPress: wordpress_oauth_tokens table - - Wix: Not checked (tokens in frontend sessionStorage) + - Wix: wix_oauth_tokens table Args: user_id: User ID (Clerk string) @@ -39,57 +37,84 @@ def get_connected_platforms(user_id: str) -> List[str]: """ connected = [] - logger.warning(f"[OAuth Monitoring] Checking connected platforms for user: {user_id}") + # Use DEBUG level for routine checks (called frequently by dashboard) + logger.debug(f"[OAuth Monitoring] Checking connected platforms for user: {user_id}") try: # Check GSC - use absolute database path db_path = os.path.abspath("alwrity.db") - logger.warning(f"[OAuth Monitoring] Checking GSC with db_path: {db_path}") gsc_service = GSCService(db_path=db_path) gsc_credentials = gsc_service.load_user_credentials(user_id) if gsc_credentials: connected.append('gsc') - logger.warning(f"[OAuth Monitoring] ✅ GSC connected for user {user_id}") + logger.debug(f"[OAuth Monitoring] ✅ GSC connected for user {user_id}") else: - logger.warning(f"[OAuth Monitoring] ❌ GSC not connected for user {user_id} (no credentials found)") + logger.debug(f"[OAuth Monitoring] ❌ GSC not connected for user {user_id}") except Exception as e: logger.warning(f"[OAuth Monitoring] ⚠️ GSC check failed for user {user_id}: {e}", exc_info=True) try: # Check Bing - use absolute database path db_path = os.path.abspath("alwrity.db") - logger.warning(f"[OAuth Monitoring] Checking Bing with db_path: {db_path}") bing_service = BingOAuthService(db_path=db_path) token_status = bing_service.get_user_token_status(user_id) - has_tokens = token_status.get('has_active_tokens', False) - logger.warning(f"[OAuth Monitoring] Bing token_status keys: {list(token_status.keys())}, has_active_tokens: {has_tokens}") - if has_tokens: + has_active_tokens = token_status.get('has_active_tokens', False) + has_expired_tokens = token_status.get('has_expired_tokens', False) + expired_tokens = token_status.get('expired_tokens', []) + + # Check if expired tokens have refresh tokens (can be refreshed) + has_refreshable_tokens = any(token.get('refresh_token') for token in expired_tokens) + + # Consider connected if user has active tokens OR expired tokens with refresh tokens + if has_active_tokens or (has_expired_tokens and has_refreshable_tokens): connected.append('bing') - logger.warning(f"[OAuth Monitoring] ✅ Bing connected for user {user_id}") + logger.debug(f"[OAuth Monitoring] ✅ Bing connected for user {user_id}") else: - logger.warning(f"[OAuth Monitoring] ❌ Bing not connected for user {user_id} (no active tokens)") + logger.debug(f"[OAuth Monitoring] ❌ Bing not connected for user {user_id}") except Exception as e: logger.warning(f"[OAuth Monitoring] ⚠️ Bing check failed for user {user_id}: {e}", exc_info=True) try: # Check WordPress - use absolute database path db_path = os.path.abspath("alwrity.db") - logger.warning(f"[OAuth Monitoring] Checking WordPress with db_path: {db_path}") wordpress_service = WordPressOAuthService(db_path=db_path) - tokens = wordpress_service.get_user_tokens(user_id) - logger.warning(f"[OAuth Monitoring] WordPress tokens found: {len(tokens) if tokens else 0}") - if tokens and len(tokens) > 0: + token_status = wordpress_service.get_user_token_status(user_id) + has_active_tokens = token_status.get('has_active_tokens', False) + has_tokens = token_status.get('has_tokens', False) + + # Consider connected if user has any tokens (WordPress tokens may not have refresh tokens) + # If tokens exist, user was connected even if expired (may need re-auth) + if has_tokens: connected.append('wordpress') - logger.warning(f"[OAuth Monitoring] ✅ WordPress connected for user {user_id} ({len(tokens)} token(s))") + logger.debug(f"[OAuth Monitoring] ✅ WordPress connected for user {user_id}") else: - logger.warning(f"[OAuth Monitoring] ❌ WordPress not connected for user {user_id} (no tokens found)") + logger.debug(f"[OAuth Monitoring] ❌ WordPress not connected for user {user_id}") except Exception as e: logger.warning(f"[OAuth Monitoring] ⚠️ WordPress check failed for user {user_id}: {e}", exc_info=True) - # Wix: Not checked (tokens in frontend sessionStorage) - # TODO: Once backend storage is implemented, check wix_tokens table + try: + # Check Wix - use absolute database path + db_path = os.path.abspath("alwrity.db") + wix_service = WixOAuthService(db_path=db_path) + token_status = wix_service.get_user_token_status(user_id) + has_active_tokens = token_status.get('has_active_tokens', False) + has_expired_tokens = token_status.get('has_expired_tokens', False) + expired_tokens = token_status.get('expired_tokens', []) + + # Check if expired tokens have refresh tokens (can be refreshed) + has_refreshable_tokens = any(token.get('refresh_token') for token in expired_tokens) + + # Consider connected if user has active tokens OR expired tokens with refresh tokens + if has_active_tokens or (has_expired_tokens and has_refreshable_tokens): + connected.append('wix') + logger.debug(f"[OAuth Monitoring] ✅ Wix connected for user {user_id}") + else: + logger.debug(f"[OAuth Monitoring] ❌ Wix not connected for user {user_id}") + except Exception as e: + logger.warning(f"[OAuth Monitoring] ⚠️ Wix check failed for user {user_id}: {e}", exc_info=True) - logger.warning(f"[OAuth Monitoring] Connected platforms for user {user_id}: {connected}") + # Don't log here - let the caller log a formatted summary if needed + # This function is called frequently and should be silent return connected diff --git a/backend/services/onboarding/api_key_manager.py b/backend/services/onboarding/api_key_manager.py index 72e37f80..df22a292 100644 --- a/backend/services/onboarding/api_key_manager.py +++ b/backend/services/onboarding/api_key_manager.py @@ -265,7 +265,27 @@ class OnboardingProgress: # Log database save confirmation logger.info(f"✅ DATABASE: API key for {provider} saved to database for user {self.user_id}") elif step.step_number == 2: # Website Analysis - self.db_service.save_website_analysis(self.user_id, step.data, db) + # Transform frontend data structure to match database schema + # Frontend sends: { website: "url", analysis: {...} } + # Database expects: { website_url: "url", ...analysis (flattened) } + analysis_for_db = {} + if step.data: + # Extract website_url from 'website' or 'website_url' field + website_url = step.data.get('website') or step.data.get('website_url') + if website_url: + analysis_for_db['website_url'] = website_url + # Flatten nested 'analysis' object if it exists + if 'analysis' in step.data and isinstance(step.data['analysis'], dict): + analysis_for_db.update(step.data['analysis']) + # Also include any other top-level fields (except 'website' and 'analysis') + for key, value in step.data.items(): + if key not in ['website', 'website_url', 'analysis']: + analysis_for_db[key] = value + # Ensure status is set + if 'status' not in analysis_for_db: + analysis_for_db['status'] = 'completed' + + self.db_service.save_website_analysis(self.user_id, analysis_for_db, db) logger.info(f"✅ DATABASE: Website analysis saved to database for user {self.user_id}") elif step.step_number == 3: # Research Preferences self.db_service.save_research_preferences(self.user_id, step.data, db) diff --git a/backend/services/onboarding/database_service.py b/backend/services/onboarding/database_service.py index 9dbd37c5..ac9c0ea1 100644 --- a/backend/services/onboarding/database_service.py +++ b/backend/services/onboarding/database_service.py @@ -336,8 +336,13 @@ class OnboardingDatabaseService: ).first() if existing: - # Update existing - existing.website_url = normalized.get('website_url', existing.website_url) + # Update existing - only update website_url if normalized value is not empty + # This prevents overwriting a valid URL with an empty string when step.data + # doesn't include the website field + normalized_url = normalized.get('website_url', '').strip() if normalized.get('website_url') else '' + if normalized_url: + existing.website_url = normalized_url + # If normalized_url is empty, keep existing.website_url unchanged existing.writing_style = normalized.get('writing_style') existing.content_characteristics = normalized.get('content_characteristics') existing.target_audience = normalized.get('target_audience') @@ -522,6 +527,52 @@ class OnboardingDatabaseService: logger.error(f"Error getting research preferences: {e}") return None + def get_competitor_analysis(self, user_id: str, db: Session = None) -> Optional[List[Dict[str, Any]]]: + """Get competitor analysis data for user from onboarding.""" + session_db = db or self.db + if not session_db: + raise ValueError("Database session required") + + try: + from models.onboarding import CompetitorAnalysis + + session = self.get_session_by_user(user_id, session_db) + if not session: + return None + + # Query CompetitorAnalysis table + competitor_records = session_db.query(CompetitorAnalysis).filter( + CompetitorAnalysis.session_id == session.id + ).all() + + if not competitor_records: + return None + + # Convert to list of dicts + competitors = [] + for record in competitor_records: + analysis_data = record.analysis_data or {} + competitors.append({ + "url": record.competitor_url, + "domain": record.competitor_domain or record.competitor_url, + "title": analysis_data.get("title", record.competitor_domain or ""), + "summary": analysis_data.get("summary", ""), + "relevance_score": analysis_data.get("relevance_score", 0.5), + "highlights": analysis_data.get("highlights", []), + "favicon": analysis_data.get("favicon"), + "image": analysis_data.get("image"), + "published_date": analysis_data.get("published_date"), + "author": analysis_data.get("author"), + "competitive_insights": analysis_data.get("competitive_analysis", {}), + "content_insights": analysis_data.get("content_insights", {}) + }) + + return competitors + + except SQLAlchemyError as e: + logger.error(f"Error getting competitor analysis: {e}") + return None + def get_persona_data(self, user_id: str, db: Session = None) -> Optional[Dict[str, Any]]: """Get persona data for user.""" session_db = db or self.db diff --git a/backend/services/platform_insights_monitoring_service.py b/backend/services/platform_insights_monitoring_service.py new file mode 100644 index 00000000..e63270e3 --- /dev/null +++ b/backend/services/platform_insights_monitoring_service.py @@ -0,0 +1,136 @@ +""" +Platform Insights Monitoring Service +Creates and manages platform insights (GSC/Bing) fetch tasks. +""" + +from datetime import datetime, timedelta +from typing import Dict, Any, Optional, List +from sqlalchemy.orm import Session + +from models.platform_insights_monitoring_models import PlatformInsightsTask +from utils.logger_utils import get_service_logger + +logger = get_service_logger("platform_insights_monitoring") + + +def create_platform_insights_task( + user_id: str, + platform: str, # 'gsc' or 'bing' + site_url: Optional[str] = None, + db: Session = None +) -> Dict[str, Any]: + """ + Create a platform insights fetch task for a user. + + This should be called when user connects GSC or Bing in Step 5. + + Args: + user_id: Clerk user ID (string) + platform: Platform name ('gsc' or 'bing') + site_url: Optional site URL (for GSC/Bing specific site) + db: Database session + + Returns: + Dictionary with success status and task details + """ + try: + logger.info( + f"[Platform Insights] Creating {platform} insights task for user: {user_id}" + ) + + # Check if task already exists + existing = db.query(PlatformInsightsTask).filter( + PlatformInsightsTask.user_id == user_id, + PlatformInsightsTask.platform == platform + ).first() + + if existing: + logger.info( + f"[Platform Insights] Task already exists for user {user_id}, platform {platform}" + ) + return { + 'success': True, + 'task_id': existing.id, + 'message': 'Task already exists', + 'existing': True + } + + # Calculate next check (7 days from now, weekly schedule) + next_check = datetime.utcnow() + timedelta(days=7) + + # Create new task + task = PlatformInsightsTask( + user_id=user_id, + platform=platform, + site_url=site_url, + status='active', + next_check=next_check, + created_at=datetime.utcnow(), + updated_at=datetime.utcnow() + ) + + db.add(task) + db.commit() + db.refresh(task) + + logger.info( + f"[Platform Insights] Created {platform} insights task {task.id} for user {user_id}, " + f"next_check: {next_check}" + ) + + return { + 'success': True, + 'task_id': task.id, + 'platform': platform, + 'next_check': next_check.isoformat(), + 'message': f'{platform.upper()} insights task created successfully' + } + + except Exception as e: + logger.error( + f"Error creating {platform} insights task for user {user_id}: {e}", + exc_info=True + ) + db.rollback() + return { + 'success': False, + 'error': str(e) + } + + +def get_user_insights_tasks( + user_id: str, + platform: Optional[str] = None, + db: Session = None +) -> List[PlatformInsightsTask]: + """ + Get all platform insights tasks for a user. + + Args: + user_id: Clerk user ID (string) + platform: Optional platform filter ('gsc' or 'bing') + db: Database session + + Returns: + List of PlatformInsightsTask instances + """ + try: + query = db.query(PlatformInsightsTask).filter( + PlatformInsightsTask.user_id == user_id + ) + + if platform: + query = query.filter(PlatformInsightsTask.platform == platform) + + tasks = query.all() + + logger.debug( + f"[Platform Insights] Found {len(tasks)} insights tasks for user {user_id}" + ) + + return tasks + + except Exception as e: + logger.error(f"Error getting insights tasks for user {user_id}: {e}", exc_info=True) + return [] + diff --git a/backend/services/research/__init__.py b/backend/services/research/__init__.py index 8e9e67f8..df0d019e 100644 --- a/backend/services/research/__init__.py +++ b/backend/services/research/__init__.py @@ -17,8 +17,10 @@ Last Updated: January 2025 from .google_search_service import GoogleSearchService from .exa_service import ExaService +from .tavily_service import TavilyService __all__ = [ "GoogleSearchService", - "ExaService" + "ExaService", + "TavilyService" ] diff --git a/backend/services/research/tavily_service.py b/backend/services/research/tavily_service.py new file mode 100644 index 00000000..29bb79c7 --- /dev/null +++ b/backend/services/research/tavily_service.py @@ -0,0 +1,425 @@ +""" +Tavily API Service for ALwrity + +This service provides web search and research capabilities using the Tavily API, +which offers AI-powered search with real-time information retrieval. + +Key Features: +- Web search with AI-powered results +- Content extraction and summarization +- Real-time information retrieval +- Topic-based search (general, news, finance) +- Advanced search depth options +- Cost-effective API usage with caching + +Dependencies: +- aiohttp (for async HTTP requests) +- os (for environment variables) +- logging (for debugging) + +Author: ALwrity Team +Version: 1.0 +Last Updated: January 2025 +""" + +import os +import json +import aiohttp +from typing import Dict, List, Optional, Any, Union +from datetime import datetime, timedelta +from loguru import logger +from urllib.parse import urlparse + + +class TavilyService: + """ + Service for web search and research using the Tavily API. + + This service provides AI-powered search capabilities to find relevant + content and information for research purposes. + """ + + def __init__(self): + """Initialize the Tavily Service with API credentials.""" + self.api_key = os.getenv("TAVILY_API_KEY") + self.base_url = "https://api.tavily.com" + self.enabled = False + + # Don't assume key is available at import time in production. + # Keys may be injected per-request via middleware, so defer init. + self._try_initialize() + + def _try_initialize(self) -> None: + """Attempt to (re)initialize the Tavily service from current environment.""" + if self.enabled and self.api_key: + return + try: + self.api_key = os.getenv("TAVILY_API_KEY") + if not self.api_key: + # Leave disabled; caller may try again after middleware injection + logger.warning("TAVILY_API_KEY not configured; Tavily service will be disabled") + self.enabled = False + return + self.enabled = True + logger.info("Tavily Service initialized successfully") + except Exception as e: + logger.error(f"Failed to initialize Tavily service: {e}") + self.enabled = False + + async def search( + self, + query: str, + topic: str = "general", + search_depth: str = "basic", + max_results: int = 10, + include_domains: Optional[List[str]] = None, + exclude_domains: Optional[List[str]] = None, + include_answer: Union[bool, str] = False, + include_raw_content: Union[bool, str] = False, + include_images: bool = False, + include_image_descriptions: bool = False, + include_favicon: bool = False, + time_range: Optional[str] = None, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + country: Optional[str] = None, + chunks_per_source: int = 3, + auto_parameters: bool = False + ) -> Dict[str, Any]: + """ + Execute a search query using Tavily API. + + Args: + query: The search query to execute + topic: Category of search (general, news, finance) + search_depth: Depth of search (basic, advanced) - basic costs 1 credit, advanced costs 2 + max_results: Maximum number of results to return (0-20) + include_domains: List of domains to specifically include + exclude_domains: List of domains to specifically exclude + include_answer: Include LLM-generated answer (basic/advanced/true/false) + include_raw_content: Include raw HTML content (markdown/text/true/false) + include_images: Include image search results + include_image_descriptions: Include image descriptions + include_favicon: Include favicon URLs + time_range: Time range filter (day, week, month, year, d, w, m, y) + start_date: Start date filter (YYYY-MM-DD) + end_date: End date filter (YYYY-MM-DD) + country: Country filter (boost results from specific country) + chunks_per_source: Maximum chunks per source (1-3, only for advanced search) + auto_parameters: Auto-configure parameters based on query + + Returns: + Dictionary containing search results + """ + try: + # Ensure we pick up any per-request injected key + self._try_initialize() + if not self.enabled: + raise ValueError("Tavily Service is not enabled - API key missing") + + logger.info(f"Starting Tavily search for: {query}") + + # Build request payload + payload = { + "api_key": self.api_key, + "query": query, + "topic": topic, + "search_depth": search_depth, + "max_results": min(max_results, 20), # Tavily limit + "include_favicon": include_favicon + } + + # Add optional parameters + if include_domains: + payload["include_domains"] = include_domains[:300] # Tavily limit + + if exclude_domains: + payload["exclude_domains"] = exclude_domains[:150] # Tavily limit + + if include_answer: + payload["include_answer"] = include_answer + + if include_raw_content: + payload["include_raw_content"] = include_raw_content + + if include_images: + payload["include_images"] = include_images + if include_image_descriptions: + payload["include_image_descriptions"] = include_image_descriptions + + if time_range: + payload["time_range"] = time_range + + if start_date: + payload["start_date"] = start_date + + if end_date: + payload["end_date"] = end_date + + if country and topic == "general": + payload["country"] = country + + if search_depth == "advanced" and 1 <= chunks_per_source <= 3: + payload["chunks_per_source"] = chunks_per_source + + if auto_parameters: + payload["auto_parameters"] = True + + # Make API request + async with aiohttp.ClientSession() as session: + async with session.post( + f"{self.base_url}/search", + json=payload, + headers={"Content-Type": "application/json"}, + timeout=aiohttp.ClientTimeout(total=60) + ) as response: + if response.status == 200: + result = await response.json() + logger.info(f"Tavily search completed successfully. Found {len(result.get('results', []))} results.") + + # Process and structure results + processed_results = self._process_search_results(result, query) + + return { + "success": True, + "query": result.get("query", query), + "answer": result.get("answer"), # If include_answer was requested + "results": processed_results, + "images": result.get("images", []), + "response_time": result.get("response_time"), + "request_id": result.get("request_id"), + "auto_parameters": result.get("auto_parameters"), + "total_results": len(processed_results), + "timestamp": datetime.utcnow().isoformat() + } + else: + error_text = await response.text() + logger.error(f"Tavily API error: {response.status} - {error_text}") + raise RuntimeError(f"Tavily API error: {response.status} - {error_text}") + + except aiohttp.ClientTimeout: + logger.error("Tavily API request timed out") + return { + "success": False, + "error": "Request timed out", + "details": "The search request took too long to complete" + } + except Exception as e: + logger.error(f"Error in Tavily search: {str(e)}") + return { + "success": False, + "error": str(e), + "details": "An unexpected error occurred during search" + } + + def _process_search_results(self, api_response: Dict[str, Any], query: str) -> List[Dict[str, Any]]: + """ + Process and structure Tavily API response into standardized format. + + Args: + api_response: Raw response from Tavily API + query: Original search query + + Returns: + List of processed search results + """ + results = [] + raw_results = api_response.get("results", []) + + for result in raw_results: + try: + # Extract domain from URL + url = result.get("url", "") + domain = urlparse(url).netloc if url else "" + + # Calculate relevance score (Tavily provides score field) + relevance_score = result.get("score", 0.5) + + processed_result = { + "url": url, + "domain": domain, + "title": result.get("title", ""), + "content": result.get("content", ""), + "raw_content": result.get("raw_content"), # If include_raw_content was requested + "score": relevance_score, + "relevance_score": relevance_score, # Alias for compatibility + "favicon": result.get("favicon"), + "published_date": result.get("published_date"), + } + + results.append(processed_result) + + except Exception as e: + logger.warning(f"Error processing Tavily result: {str(e)}") + continue + + # Sort by relevance score (highest first) + results.sort(key=lambda x: x.get("relevance_score", 0), reverse=True) + + return results + + async def search_industry_trends( + self, + topic: str, + industry: str, + max_results: int = 10, + search_depth: str = "basic" + ) -> Dict[str, Any]: + """ + Search for current industry trends and insights. + + Args: + topic: The specific topic to research + industry: The industry context for the search + max_results: Maximum number of search results to return + search_depth: Depth of search (basic or advanced) + + Returns: + Dictionary containing search results with industry context + """ + # Build industry-specific query + search_query = f"{topic} {industry} trends insights" + + # Use news topic for current trends + return await self.search( + query=search_query, + topic="news" if search_depth == "basic" else "general", + search_depth=search_depth, + max_results=max_results, + include_answer="basic", + include_favicon=True, + time_range="month" # Last month for current trends + ) + + async def discover_competitors( + self, + user_url: str, + num_results: int = 10, + include_domains: Optional[List[str]] = None, + exclude_domains: Optional[List[str]] = None, + industry_context: Optional[str] = None, + website_analysis_data: Optional[Dict[str, Any]] = None + ) -> Dict[str, Any]: + """ + Discover competitors for a given website using Tavily search. + + Args: + user_url: The website URL to find competitors for + num_results: Number of competitor results to return + include_domains: List of domains to include in search + exclude_domains: List of domains to exclude from search + industry_context: Industry context for better competitor discovery + + Returns: + Dictionary containing competitor analysis results + """ + try: + # Ensure we pick up any per-request injected key + self._try_initialize() + if not self.enabled: + raise ValueError("Tavily Service is not enabled - API key missing") + + logger.info(f"Starting competitor discovery for: {user_url}") + + # Extract user domain for exclusion + user_domain = urlparse(user_url).netloc + exclude_domains_list = exclude_domains or [] + exclude_domains_list.append(user_domain) + + # Build search query + query_parts = ["similar websites", "competitors"] + if industry_context: + query_parts.append(f"in {industry_context}") + + # Extract insights from website analysis if available + if website_analysis_data: + analysis = website_analysis_data.get('analysis', {}) + if 'target_audience' in analysis: + audience = analysis['target_audience'] + if isinstance(audience, dict) and 'primary_audience' in audience: + query_parts.append(audience['primary_audience']) + + search_query = " ".join(query_parts) + + # Perform search + search_result = await self.search( + query=search_query, + topic="general", + search_depth="advanced", # Use advanced for better competitor discovery + max_results=num_results, + include_domains=include_domains, + exclude_domains=exclude_domains_list, + include_favicon=True, + chunks_per_source=3 + ) + + if not search_result.get("success"): + return search_result + + # Process results into competitor format + competitors = [] + for result in search_result.get("results", []): + competitor_data = { + "url": result.get("url"), + "domain": result.get("domain"), + "title": result.get("title"), + "summary": result.get("content", ""), + "relevance_score": result.get("relevance_score", 0.5), + "favicon": result.get("favicon"), + "published_date": result.get("published_date"), + "highlights": self._extract_highlights(result.get("content", "")), + "competitive_insights": self._extract_competitive_insights(result), + "content_insights": self._analyze_content_quality(result) + } + competitors.append(competitor_data) + + logger.info(f"Successfully discovered {len(competitors)} competitors for {user_url}") + + return { + "success": True, + "user_url": user_url, + "competitors": competitors, + "total_competitors": len(competitors), + "analysis_timestamp": datetime.utcnow().isoformat(), + "industry_context": industry_context, + "request_id": search_result.get("request_id") + } + + except Exception as e: + logger.error(f"Error in competitor discovery: {str(e)}") + return { + "success": False, + "error": str(e), + "details": "An unexpected error occurred during competitor discovery" + } + + def _extract_highlights(self, content: str, num_sentences: int = 3) -> List[str]: + """Extract key highlights from content.""" + if not content: + return [] + + # Simple sentence extraction (can be enhanced with NLP) + sentences = [s.strip() for s in content.split('.') if s.strip()] + return sentences[:num_sentences] + + def _extract_competitive_insights(self, result: Dict[str, Any]) -> Dict[str, Any]: + """Extract competitive insights from search result.""" + content = result.get("content", "") + title = result.get("title", "") + + return { + "business_model": "unknown", + "target_audience": "unknown", + "key_differentiators": [] + } + + def _analyze_content_quality(self, result: Dict[str, Any]) -> Dict[str, Any]: + """Analyze content quality metrics.""" + content = result.get("content", "") + + return { + "content_focus": "general", + "content_quality": "medium", + "publishing_frequency": "unknown" + } + diff --git a/backend/services/scheduler/__init__.py b/backend/services/scheduler/__init__.py index 4adb11e7..171dbe9c 100644 --- a/backend/services/scheduler/__init__.py +++ b/backend/services/scheduler/__init__.py @@ -3,6 +3,8 @@ Task Scheduler Package Modular, pluggable scheduler for ALwrity tasks. """ +from sqlalchemy.orm import Session + from .core.scheduler import TaskScheduler from .core.executor_interface import TaskExecutor, TaskExecutionResult from .core.exception_handler import ( @@ -11,8 +13,13 @@ from .core.exception_handler import ( ) from .executors.monitoring_task_executor import MonitoringTaskExecutor from .executors.oauth_token_monitoring_executor import OAuthTokenMonitoringExecutor +from .executors.website_analysis_executor import WebsiteAnalysisExecutor +from .executors.gsc_insights_executor import GSCInsightsExecutor +from .executors.bing_insights_executor import BingInsightsExecutor from .utils.task_loader import load_due_monitoring_tasks from .utils.oauth_token_task_loader import load_due_oauth_token_monitoring_tasks +from .utils.website_analysis_task_loader import load_due_website_analysis_tasks +from .utils.platform_insights_task_loader import load_due_platform_insights_tasks # Global scheduler instance (initialized on first access) _scheduler_instance: TaskScheduler = None @@ -47,6 +54,37 @@ def get_scheduler() -> TaskScheduler: oauth_token_executor, load_due_oauth_token_monitoring_tasks ) + + # Register website analysis executor + website_analysis_executor = WebsiteAnalysisExecutor() + _scheduler_instance.register_executor( + 'website_analysis', + website_analysis_executor, + load_due_website_analysis_tasks + ) + + # Register platform insights executors + # GSC insights executor + def load_due_gsc_insights_tasks(db: Session, user_id=None): + return load_due_platform_insights_tasks(db, user_id, platform='gsc') + + gsc_insights_executor = GSCInsightsExecutor() + _scheduler_instance.register_executor( + 'gsc_insights', + gsc_insights_executor, + load_due_gsc_insights_tasks + ) + + # Bing insights executor + def load_due_bing_insights_tasks(db: Session, user_id=None): + return load_due_platform_insights_tasks(db, user_id, platform='bing') + + bing_insights_executor = BingInsightsExecutor() + _scheduler_instance.register_executor( + 'bing_insights', + bing_insights_executor, + load_due_bing_insights_tasks + ) return _scheduler_instance @@ -57,6 +95,9 @@ __all__ = [ 'TaskExecutionResult', 'MonitoringTaskExecutor', 'OAuthTokenMonitoringExecutor', + 'WebsiteAnalysisExecutor', + 'GSCInsightsExecutor', + 'BingInsightsExecutor', 'get_scheduler', # Exception handling 'SchedulerExceptionHandler', diff --git a/backend/services/scheduler/core/check_cycle_handler.py b/backend/services/scheduler/core/check_cycle_handler.py index 0d42d8f8..80ab3544 100644 --- a/backend/services/scheduler/core/check_cycle_handler.py +++ b/backend/services/scheduler/core/check_cycle_handler.py @@ -10,6 +10,7 @@ from sqlalchemy.orm import Session from services.database import get_db_session from utils.logger_utils import get_service_logger from models.scheduler_models import SchedulerEventLog +from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats from .exception_handler import DatabaseError from .interval_manager import adjust_check_interval_if_needed @@ -100,6 +101,7 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'): logger.warning("\n".join(check_lines)) # Save check cycle event to database for historical tracking + event_log_id = None try: event_log = SchedulerEventLog( event_type='check_cycle', @@ -119,11 +121,63 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'): } ) db.add(event_log) + db.flush() # Flush to get the ID without committing + event_log_id = event_log.id db.commit() + logger.debug(f"[Check Cycle] Saved event log with ID: {event_log_id}") except Exception as e: - logger.warning(f"Failed to save check cycle event log: {e}") + logger.error(f"[Check Cycle] ❌ Failed to save check cycle event log: {e}", exc_info=True) if db: db.rollback() + # Continue execution even if event log save fails + + # Update cumulative stats table (persistent across restarts) + try: + cumulative_stats = SchedulerCumulativeStats.get_or_create(db) + + # Update cumulative metrics by adding this cycle's values + # Get current cycle values (incremental, not total) + cycle_tasks_found = cycle_summary.get('total_found', 0) + cycle_tasks_executed = cycle_summary.get('total_executed', 0) + cycle_tasks_failed = cycle_summary.get('total_failed', 0) + + # Update cumulative totals (additive) + cumulative_stats.total_check_cycles += 1 + cumulative_stats.cumulative_tasks_found += cycle_tasks_found + cumulative_stats.cumulative_tasks_executed += cycle_tasks_executed + cumulative_stats.cumulative_tasks_failed += cycle_tasks_failed + # Note: tasks_skipped in scheduler.stats is a running total, not per-cycle + # We track it as-is from scheduler.stats (it's already cumulative) + # This ensures we don't double-count skipped tasks + if cumulative_stats.cumulative_tasks_skipped is None: + cumulative_stats.cumulative_tasks_skipped = 0 + # Update to current total from scheduler (which is already cumulative) + current_skipped = scheduler.stats.get('tasks_skipped', 0) + if current_skipped > cumulative_stats.cumulative_tasks_skipped: + cumulative_stats.cumulative_tasks_skipped = current_skipped + cumulative_stats.last_check_cycle_id = event_log_id + cumulative_stats.last_updated = datetime.utcnow() + cumulative_stats.updated_at = datetime.utcnow() + + db.commit() + # Log at DEBUG level to avoid noise during normal operation + # This is expected behavior, not a warning + logger.debug( + f"[Check Cycle] Updated cumulative stats: " + f"cycles={cumulative_stats.total_check_cycles}, " + f"found={cumulative_stats.cumulative_tasks_found}, " + f"executed={cumulative_stats.cumulative_tasks_executed}, " + f"failed={cumulative_stats.cumulative_tasks_failed}" + ) + except Exception as e: + logger.error(f"[Check Cycle] ❌ Failed to update cumulative stats: {e}", exc_info=True) + if db: + db.rollback() + # Log warning but continue - cumulative stats can be rebuilt from event logs + logger.warning( + "[Check Cycle] ⚠️ Cumulative stats update failed. " + "Stats can be rebuilt from event logs on next dashboard load." + ) # Update last_update timestamp for frontend polling scheduler.stats['last_update'] = datetime.utcnow().isoformat() diff --git a/backend/services/scheduler/core/oauth_task_restoration.py b/backend/services/scheduler/core/oauth_task_restoration.py index e6d92410..4c4cf8ca 100644 --- a/backend/services/scheduler/core/oauth_task_restoration.py +++ b/backend/services/scheduler/core/oauth_task_restoration.py @@ -104,19 +104,16 @@ async def restore_oauth_monitoring_tasks(scheduler): # Fallback to users with existing tasks only total_created = 0 + restoration_summary = [] # Collect summary for single log + for user_id in users_to_check: try: - # Get connected platforms for this user + # Get connected platforms for this user (silent - no logging) connected_platforms = get_connected_platforms(user_id) - logger.warning( - f"[OAuth Task Restoration] User {user_id}: " - f"Connected platforms: {connected_platforms}" - ) - if not connected_platforms: logger.debug( - f"[OAuth Task Restoration] No connected platforms for user {user_id}, skipping" + f"[OAuth Task Restoration] No connected platforms for user {user_id[:20]}..., skipping" ) continue @@ -134,11 +131,6 @@ async def restore_oauth_monitoring_tasks(scheduler): ] if missing_platforms: - logger.warning( - f"[OAuth Task Restoration] ⚠️ User {user_id} has connected platforms " - f"{connected_platforms} but missing tasks for: {missing_platforms}" - ) - # Create missing tasks created = create_oauth_monitoring_tasks( user_id=user_id, @@ -147,15 +139,10 @@ async def restore_oauth_monitoring_tasks(scheduler): ) total_created += len(created) - - logger.warning( - f"[OAuth Task Restoration] ✅ Created {len(created)} missing OAuth tasks " - f"for user {user_id}, platforms: {missing_platforms}" - ) - else: - logger.warning( - f"[OAuth Task Restoration] ✅ User {user_id} has all required tasks " - f"for connected platforms: {connected_platforms}" + # Collect summary info instead of logging immediately + platforms_str = ", ".join([p.upper() for p in missing_platforms]) + restoration_summary.append( + f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})" ) except Exception as e: @@ -173,16 +160,23 @@ async def restore_oauth_monitoring_tasks(scheduler): final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())]) + # Single formatted summary log (similar to scheduler startup) if total_created > 0: + summary_lines = "\n".join(restoration_summary[:5]) # Show first 5 users + if len(restoration_summary) > 5: + summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users" + logger.warning( - f"[OAuth Task Restoration] ✅ Created {total_created} missing OAuth monitoring tasks. " - f"Final platform breakdown: {final_platform_summary}" + f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n" + f" ├─ Tasks Created: {total_created}\n" + f" ├─ Users Processed: {len(users_to_check)}\n" + f" ├─ Platform Breakdown: {final_platform_summary}\n" + + summary_lines ) else: logger.warning( f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. " - f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks. " - f"Platform breakdown: {final_platform_summary}" + f"Checked {len(users_to_check)} users. Platform breakdown: {final_platform_summary}" ) finally: diff --git a/backend/services/scheduler/core/platform_insights_task_restoration.py b/backend/services/scheduler/core/platform_insights_task_restoration.py new file mode 100644 index 00000000..10233128 --- /dev/null +++ b/backend/services/scheduler/core/platform_insights_task_restoration.py @@ -0,0 +1,152 @@ +""" +Platform Insights Task Restoration +Automatically creates missing platform insights tasks for users who have connected platforms +but don't have insights tasks created yet. +""" + +from datetime import datetime, timedelta +from typing import List +from sqlalchemy.orm import Session +from utils.logger_utils import get_service_logger + +from services.database import get_db_session +from models.platform_insights_monitoring_models import PlatformInsightsTask +from services.platform_insights_monitoring_service import create_platform_insights_task +from services.oauth_token_monitoring_service import get_connected_platforms +from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask + +logger = get_service_logger("platform_insights_task_restoration") + + +async def restore_platform_insights_tasks(scheduler): + """ + Restore/create missing platform insights tasks for all users. + + This checks all users who have connected platforms (GSC/Bing) and ensures they have + insights tasks created. Tasks are created for platforms that are: + - Connected (detected via get_connected_platforms or OAuth tasks) + - Missing insights tasks (no PlatformInsightsTask exists) + + Args: + scheduler: TaskScheduler instance + """ + try: + logger.warning("[Platform Insights Restoration] Starting platform insights task restoration...") + db = get_db_session() + if not db: + logger.warning("[Platform Insights Restoration] Could not get database session") + return + + try: + # Get all existing insights tasks to find unique user_ids + existing_tasks = db.query(PlatformInsightsTask).all() + user_ids_with_tasks = set(task.user_id for task in existing_tasks) + + # Get all OAuth tasks to find users with connected platforms + oauth_tasks = db.query(OAuthTokenMonitoringTask).all() + user_ids_with_oauth = set(task.user_id for task in oauth_tasks) + + # Platforms that support insights (GSC and Bing only) + insights_platforms = ['gsc', 'bing'] + + # Get users who have OAuth tasks for GSC or Bing + users_to_check = set() + for task in oauth_tasks: + if task.platform in insights_platforms: + users_to_check.add(task.user_id) + + logger.warning( + f"[Platform Insights Restoration] Found {len(existing_tasks)} existing insights tasks " + f"for {len(user_ids_with_tasks)} users. Checking {len(users_to_check)} users " + f"with GSC/Bing OAuth connections." + ) + + if not users_to_check: + logger.warning("[Platform Insights Restoration] No users with GSC/Bing connections found") + return + + total_created = 0 + restoration_summary = [] + + for user_id in users_to_check: + try: + # Get connected platforms for this user + connected_platforms = get_connected_platforms(user_id) + + # Filter to only GSC and Bing + insights_connected = [p for p in connected_platforms if p in insights_platforms] + + if not insights_connected: + logger.debug( + f"[Platform Insights Restoration] No GSC/Bing connections for user {user_id[:20]}..., skipping" + ) + continue + + # Check which platforms are missing insights tasks + existing_platforms = { + task.platform + for task in existing_tasks + if task.user_id == user_id + } + + missing_platforms = [ + platform + for platform in insights_connected + if platform not in existing_platforms + ] + + if missing_platforms: + # Create missing tasks for each platform + for platform in missing_platforms: + try: + # Don't fetch site_url here - it requires API calls + # The executor will fetch it when the task runs (weekly) + # This avoids API calls during restoration + result = create_platform_insights_task( + user_id=user_id, + platform=platform, + site_url=None, # Will be fetched by executor when task runs + db=db + ) + + if result.get('success'): + total_created += 1 + restoration_summary.append( + f" ├─ User {user_id[:20]}...: {platform.upper()} task created" + ) + else: + logger.debug( + f"[Platform Insights Restoration] Failed to create {platform} task " + f"for user {user_id}: {result.get('error')}" + ) + except Exception as e: + logger.debug( + f"[Platform Insights Restoration] Error creating {platform} task " + f"for user {user_id}: {e}" + ) + continue + + except Exception as e: + logger.debug( + f"[Platform Insights Restoration] Error processing user {user_id}: {e}" + ) + continue + + # Log summary + if total_created > 0: + logger.warning( + f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" + + "\n".join(restoration_summary) + ) + else: + logger.warning( + f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. " + f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks." + ) + + finally: + db.close() + + except Exception as e: + logger.error(f"[Platform Insights Restoration] Error during restoration: {e}", exc_info=True) + diff --git a/backend/services/scheduler/core/scheduler.py b/backend/services/scheduler/core/scheduler.py index fd769e8d..f3adb2d0 100644 --- a/backend/services/scheduler/core/scheduler.py +++ b/backend/services/scheduler/core/scheduler.py @@ -26,6 +26,8 @@ from models.scheduler_models import SchedulerEventLog from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed from .job_restoration import restore_persona_jobs from .oauth_task_restoration import restore_oauth_monitoring_tasks +from .website_analysis_task_restoration import restore_website_analysis_tasks +from .platform_insights_task_restoration import restore_platform_insights_tasks from .check_cycle_handler import check_and_execute_due_tasks from .task_execution_handler import execute_task_async @@ -185,6 +187,15 @@ class TaskScheduler: # Restore/create missing OAuth token monitoring tasks for connected platforms await restore_oauth_monitoring_tasks(self) + # Restore/create missing website analysis tasks for users who completed onboarding + await restore_website_analysis_tasks(self) + + # Restore/create missing platform insights tasks for users with connected GSC/Bing + await restore_platform_insights_tasks(self) + + # Validate and rebuild cumulative stats if needed + await self._validate_and_rebuild_cumulative_stats() + # Get all scheduled APScheduler jobs (including one-time tasks) all_jobs = self.scheduler.get_jobs() registered_types = self.registry.get_registered_types() @@ -260,27 +271,55 @@ class TaskScheduler: f"tasks haven't been created. Error type: {type(e).__name__}" ) + # Get website analysis tasks count + website_analysis_tasks_count = 0 + try: + from models.website_analysis_monitoring_models import WebsiteAnalysisTask + website_analysis_tasks_count = db.query(WebsiteAnalysisTask).filter( + WebsiteAnalysisTask.status == 'active' + ).count() + except Exception as e: + logger.debug(f"Could not get website analysis tasks count: {e}") + + # Get platform insights tasks count + platform_insights_tasks_count = 0 + try: + from models.platform_insights_monitoring_models import PlatformInsightsTask + platform_insights_tasks_count = db.query(PlatformInsightsTask).filter( + PlatformInsightsTask.status == 'active' + ).count() + except Exception as e: + logger.debug(f"Could not get platform insights tasks count: {e}") + # Calculate job counts apscheduler_recurring = 1 # check_due_tasks apscheduler_one_time = len(all_jobs) - 1 - total_recurring = apscheduler_recurring + oauth_tasks_count - total_jobs = len(all_jobs) + oauth_tasks_count + total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count + total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count # Build comprehensive startup log message + recurring_breakdown = f"check_due_tasks: {apscheduler_recurring}" + if oauth_tasks_count > 0: + recurring_breakdown += f", OAuth monitoring: {oauth_tasks_count}" + if website_analysis_tasks_count > 0: + recurring_breakdown += f", Website analysis: {website_analysis_tasks_count}" + if platform_insights_tasks_count > 0: + recurring_breakdown += f", Platform insights: {platform_insights_tasks_count}" + startup_lines = [ f"[Scheduler] ✅ Task Scheduler Started", f" ├─ Check Interval: {initial_interval} minutes", f" ├─ Registered Task Types: {len(registered_types)} ({', '.join(registered_types) if registered_types else 'none'})", f" ├─ Active Strategies: {active_strategies}", f" ├─ Total Scheduled Jobs: {total_jobs}", - f" ├─ Recurring Jobs: {total_recurring} (check_due_tasks: {apscheduler_recurring}, OAuth monitoring: {oauth_tasks_count})", + f" ├─ Recurring Jobs: {total_recurring} ({recurring_breakdown})", f" └─ One-Time Jobs: {apscheduler_one_time}" ] # Add APScheduler job details if all_jobs: for idx, job in enumerate(all_jobs): - is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0 + is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 prefix = " └─" if is_last else " ├─" next_run = job.next_run_time trigger_type = type(job.trigger).__name__ @@ -338,7 +377,7 @@ class TaskScheduler: oauth_tasks = db.query(OAuthTokenMonitoringTask).all() for idx, task in enumerate(oauth_tasks): - is_last = idx == len(oauth_tasks) - 1 and len(all_jobs) == 0 + is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 prefix = " └─" if is_last else " ├─" try: @@ -367,6 +406,71 @@ class TaskScheduler: except Exception as e: logger.debug(f"Could not get OAuth token monitoring task details: {e}") + # Add website analysis tasks details + if website_analysis_tasks_count > 0: + try: + db = get_db_session() + if db: + from models.website_analysis_monitoring_models import WebsiteAnalysisTask + website_analysis_tasks = db.query(WebsiteAnalysisTask).all() + + for idx, task in enumerate(website_analysis_tasks): + is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0 + prefix = " └─" if is_last else " ├─" + + try: + user_job_store = get_user_job_store_name(task.user_id, db) + except Exception as e: + logger.debug(f"Could not extract job store name for user {task.user_id}: {e}") + user_job_store = 'default' + + next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled' + frequency = f"Every {task.frequency_days} days" + task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor" + status_indicator = "✅" if task.status == 'active' else f"[{task.status}]" + website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A') + + startup_lines.append( + f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | " + f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | " + f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}" + ) + db.close() + except Exception as e: + logger.debug(f"Could not get website analysis task details: {e}") + + # Add platform insights tasks details + if platform_insights_tasks_count > 0: + try: + db = get_db_session() + if db: + from models.platform_insights_monitoring_models import PlatformInsightsTask + platform_insights_tasks = db.query(PlatformInsightsTask).all() + + for idx, task in enumerate(platform_insights_tasks): + is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0 + prefix = " └─" if is_last else " ├─" + + try: + user_job_store = get_user_job_store_name(task.user_id, db) + except Exception as e: + logger.debug(f"Could not extract job store name for user {task.user_id}: {e}") + user_job_store = 'default' + + next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled' + platform_label = task.platform.upper() if task.platform else 'Unknown' + site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A') + status_indicator = "✅" if task.status == 'active' else f"[{task.status}]" + + startup_lines.append( + f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | " + f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | " + f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}" + ) + db.close() + except Exception as e: + logger.debug(f"Could not get platform insights task details: {e}") + # Log comprehensive startup information in single message logger.warning("\n".join(startup_lines)) @@ -384,7 +488,9 @@ class TaskScheduler: 'total_jobs': total_jobs, 'recurring_jobs': total_recurring, 'one_time_jobs': apscheduler_one_time, - 'oauth_monitoring_tasks': oauth_tasks_count + 'oauth_monitoring_tasks': oauth_tasks_count, + 'website_analysis_tasks': website_analysis_tasks_count, + 'platform_insights_tasks': platform_insights_tasks_count } ) db.add(event_log) @@ -533,6 +639,128 @@ class TaskScheduler: except Exception as e: logger.warning(f"Error triggering interval adjustment: {e}") + async def _validate_and_rebuild_cumulative_stats(self): + """ + Validate cumulative stats on scheduler startup and rebuild if needed. + This ensures cumulative stats are accurate after restarts. + """ + db = None + try: + db = get_db_session() + if not db: + logger.warning("[Scheduler] Could not get database session for cumulative stats validation") + return + + try: + from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats + from models.scheduler_models import SchedulerEventLog + from sqlalchemy import func + + # Get cumulative stats from persistent table + cumulative_stats = db.query(SchedulerCumulativeStats).filter( + SchedulerCumulativeStats.id == 1 + ).first() + + # Count check_cycle events in database + check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter( + SchedulerEventLog.event_type == 'check_cycle' + ).scalar() or 0 + + if cumulative_stats: + # Validate: cumulative stats should match event log count + if cumulative_stats.total_check_cycles != check_cycle_count: + logger.warning( + f"[Scheduler] ⚠️ Cumulative stats validation failed on startup: " + f"cumulative_stats.total_check_cycles={cumulative_stats.total_check_cycles} " + f"vs event_logs.count={check_cycle_count}. " + f"Rebuilding cumulative stats from event logs..." + ) + + # Rebuild from event logs + result = db.query( + func.count(SchedulerEventLog.id), + func.sum(SchedulerEventLog.tasks_found), + func.sum(SchedulerEventLog.tasks_executed), + func.sum(SchedulerEventLog.tasks_failed) + ).filter( + SchedulerEventLog.event_type == 'check_cycle' + ).first() + + if result: + total_cycles = result[0] if result[0] is not None else 0 + total_found = result[1] if result[1] is not None else 0 + total_executed = result[2] if result[2] is not None else 0 + total_failed = result[3] if result[3] is not None else 0 + + # Update cumulative stats + cumulative_stats.total_check_cycles = int(total_cycles) + cumulative_stats.cumulative_tasks_found = int(total_found) + cumulative_stats.cumulative_tasks_executed = int(total_executed) + cumulative_stats.cumulative_tasks_failed = int(total_failed) + cumulative_stats.last_updated = datetime.utcnow() + cumulative_stats.updated_at = datetime.utcnow() + + db.commit() + logger.warning( + f"[Scheduler] ✅ Rebuilt cumulative stats on startup: " + f"cycles={total_cycles}, found={total_found}, " + f"executed={total_executed}, failed={total_failed}" + ) + else: + logger.warning("[Scheduler] No check_cycle events found to rebuild from") + else: + logger.warning( + f"[Scheduler] ✅ Cumulative stats validated: " + f"{cumulative_stats.total_check_cycles} check cycles match event logs" + ) + else: + # Cumulative stats table doesn't exist, create it from event logs + logger.warning( + "[Scheduler] Cumulative stats table not found. " + "Creating from event logs..." + ) + + result = db.query( + func.count(SchedulerEventLog.id), + func.sum(SchedulerEventLog.tasks_found), + func.sum(SchedulerEventLog.tasks_executed), + func.sum(SchedulerEventLog.tasks_failed) + ).filter( + SchedulerEventLog.event_type == 'check_cycle' + ).first() + + if result: + total_cycles = result[0] if result[0] is not None else 0 + total_found = result[1] if result[1] is not None else 0 + total_executed = result[2] if result[2] is not None else 0 + total_failed = result[3] if result[3] is not None else 0 + + cumulative_stats = SchedulerCumulativeStats.get_or_create(db) + cumulative_stats.total_check_cycles = int(total_cycles) + cumulative_stats.cumulative_tasks_found = int(total_found) + cumulative_stats.cumulative_tasks_executed = int(total_executed) + cumulative_stats.cumulative_tasks_failed = int(total_failed) + cumulative_stats.last_updated = datetime.utcnow() + cumulative_stats.updated_at = datetime.utcnow() + + db.commit() + logger.warning( + f"[Scheduler] ✅ Created cumulative stats from event logs: " + f"cycles={total_cycles}, found={total_found}, " + f"executed={total_executed}, failed={total_failed}" + ) + except ImportError: + logger.warning( + "[Scheduler] Cumulative stats model not available. " + "Migration may not have been run yet. " + "Run: python backend/scripts/run_cumulative_stats_migration.py" + ) + except Exception as e: + logger.error(f"[Scheduler] Error validating cumulative stats: {e}", exc_info=True) + finally: + if db: + db.close() + async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]: """ Process due tasks for a specific task type. diff --git a/backend/services/scheduler/core/website_analysis_task_restoration.py b/backend/services/scheduler/core/website_analysis_task_restoration.py new file mode 100644 index 00000000..9cef62a0 --- /dev/null +++ b/backend/services/scheduler/core/website_analysis_task_restoration.py @@ -0,0 +1,193 @@ +""" +Website Analysis Task Restoration +Automatically creates missing website analysis tasks for users who completed onboarding +but don't have monitoring tasks created yet. +""" + +from typing import List +from sqlalchemy.orm import Session +from utils.logger_utils import get_service_logger + +from services.database import get_db_session +from models.website_analysis_monitoring_models import WebsiteAnalysisTask +from services.website_analysis_monitoring_service import create_website_analysis_tasks +from models.onboarding import OnboardingSession +from sqlalchemy import or_ + +# Use service logger for consistent logging (WARNING level visible in production) +logger = get_service_logger("website_analysis_restoration") + + +async def restore_website_analysis_tasks(scheduler): + """ + Restore/create missing website analysis tasks for all users. + + This checks all users who completed onboarding and ensures they have + website analysis tasks created. Tasks are created for: + - User's website (if analysis exists) + - All competitors (from onboarding step 3) + + Args: + scheduler: TaskScheduler instance + """ + try: + logger.warning("[Website Analysis Restoration] Starting website analysis task restoration...") + db = get_db_session() + if not db: + logger.warning("[Website Analysis Restoration] Could not get database session") + return + + try: + # Check if table exists (may not exist if migration hasn't run) + try: + existing_tasks = db.query(WebsiteAnalysisTask).all() + except Exception as table_error: + logger.error( + f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist: {table_error}. " + f"Please run database migration: create_website_analysis_monitoring_tables.sql" + ) + return + + user_ids_with_tasks = set(task.user_id for task in existing_tasks) + + # Log existing tasks breakdown by type + existing_by_type = {} + for task in existing_tasks: + existing_by_type[task.task_type] = existing_by_type.get(task.task_type, 0) + 1 + + type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(existing_by_type.items())]) + logger.warning( + f"[Website Analysis Restoration] Found {len(existing_tasks)} existing website analysis tasks " + f"for {len(user_ids_with_tasks)} users. Types: {type_summary}" + ) + + # Check users who already have at least one website analysis task + users_to_check = list(user_ids_with_tasks) + + # Also query all users from onboarding who completed step 2 (website analysis) + # to catch users who completed onboarding but tasks weren't created + # Use the same pattern as OnboardingProgressService.get_onboarding_status() + # Completion is tracked by: current_step >= 6 OR progress >= 100.0 + # This matches the logic used in home page redirect and persona generation checks + try: + from services.onboarding.progress_service import get_onboarding_progress_service + from models.onboarding import OnboardingSession + from sqlalchemy import or_ + + # Get onboarding progress service (same as used throughout the app) + progress_service = get_onboarding_progress_service() + + # Query all sessions and filter using the same completion logic as the service + # This matches the pattern in OnboardingProgressService.get_onboarding_status(): + # is_completed = (session.current_step >= 6) or (session.progress >= 100.0) + completed_sessions = db.query(OnboardingSession).filter( + or_( + OnboardingSession.current_step >= 6, + OnboardingSession.progress >= 100.0 + ) + ).all() + + # Validate using the service method for consistency + onboarding_user_ids = set() + for session in completed_sessions: + # Use the same service method as the rest of the app + status = progress_service.get_onboarding_status(session.user_id) + if status.get('is_completed', False): + onboarding_user_ids.add(session.user_id) + + all_user_ids = users_to_check.copy() + + # Add users from onboarding who might not have tasks yet + for user_id in onboarding_user_ids: + if user_id not in all_user_ids: + all_user_ids.append(user_id) + + users_to_check = all_user_ids + logger.warning( + f"[Website Analysis Restoration] Checking {len(users_to_check)} users " + f"({len(user_ids_with_tasks)} with existing tasks, " + f"{len(onboarding_user_ids)} from onboarding sessions, " + f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)" + ) + except Exception as e: + logger.warning(f"[Website Analysis Restoration] Could not query onboarding users: {e}") + # Fallback to users with existing tasks only + users_to_check = list(user_ids_with_tasks) + + total_created = 0 + users_processed = 0 + + for user_id in users_to_check: + try: + users_processed += 1 + + # Check if user already has tasks + existing_user_tasks = [ + task for task in existing_tasks + if task.user_id == user_id + ] + + if existing_user_tasks: + logger.debug( + f"[Website Analysis Restoration] User {user_id} already has " + f"{len(existing_user_tasks)} website analysis tasks, skipping" + ) + continue + + logger.warning( + f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding " + f"but has no website analysis tasks. Creating tasks..." + ) + + # Create missing tasks + result = create_website_analysis_tasks(user_id=user_id, db=db) + + if result.get('success'): + tasks_count = result.get('tasks_created', 0) + total_created += tasks_count + logger.warning( + f"[Website Analysis Restoration] ✅ Created {tasks_count} website analysis tasks " + f"for user {user_id}" + ) + else: + error = result.get('error', 'Unknown error') + logger.warning( + f"[Website Analysis Restoration] ⚠️ Could not create tasks for user {user_id}: {error}" + ) + + except Exception as e: + logger.warning( + f"[Website Analysis Restoration] Error checking/creating tasks for user {user_id}: {e}", + exc_info=True + ) + continue + + # Final summary log + final_existing_tasks = db.query(WebsiteAnalysisTask).all() + final_by_type = {} + for task in final_existing_tasks: + final_by_type[task.task_type] = final_by_type.get(task.task_type, 0) + 1 + + final_type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(final_by_type.items())]) + + if total_created > 0: + logger.warning( + f"[Website Analysis Restoration] ✅ Created {total_created} missing website analysis tasks. " + f"Processed {users_processed} users. Final type breakdown: {final_type_summary}" + ) + else: + logger.warning( + f"[Website Analysis Restoration] ✅ All users have required website analysis tasks. " + f"Checked {users_processed} users, found {len(existing_tasks)} existing tasks. " + f"Type breakdown: {final_type_summary}" + ) + + finally: + db.close() + + except Exception as e: + logger.error( + f"[Website Analysis Restoration] Error restoring website analysis tasks: {e}", + exc_info=True + ) + diff --git a/backend/services/scheduler/executors/bing_insights_executor.py b/backend/services/scheduler/executors/bing_insights_executor.py new file mode 100644 index 00000000..bea18558 --- /dev/null +++ b/backend/services/scheduler/executors/bing_insights_executor.py @@ -0,0 +1,298 @@ +""" +Bing Insights Task Executor +Handles execution of Bing insights fetch tasks for connected platforms. +""" + +import logging +import os +import time +from datetime import datetime, timedelta +from typing import Dict, Any, Optional +from sqlalchemy.orm import Session + +from ..core.executor_interface import TaskExecutor, TaskExecutionResult +from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler +from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog +from services.bing_analytics_storage_service import BingAnalyticsStorageService +from services.integrations.bing_oauth import BingOAuthService +from utils.logger_utils import get_service_logger + +logger = get_service_logger("bing_insights_executor") + + +class BingInsightsExecutor(TaskExecutor): + """ + Executor for Bing insights fetch tasks. + + Handles: + - Fetching Bing insights data weekly + - On first run: Loads existing cached data + - On subsequent runs: Fetches fresh data from Bing API + - Logging results and updating task status + """ + + def __init__(self): + self.logger = logger + self.exception_handler = SchedulerExceptionHandler() + database_url = os.getenv('DATABASE_URL', 'sqlite:///alwrity.db') + self.storage_service = BingAnalyticsStorageService(database_url) + self.bing_oauth = BingOAuthService() + + async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult: + """ + Execute a Bing insights fetch task. + + Args: + task: PlatformInsightsTask instance + db: Database session + + Returns: + TaskExecutionResult + """ + start_time = time.time() + user_id = task.user_id + site_url = task.site_url + + try: + self.logger.info( + f"Executing Bing insights fetch: task_id={task.id} | " + f"user_id={user_id} | site_url={site_url}" + ) + + # Create execution log + execution_log = PlatformInsightsExecutionLog( + task_id=task.id, + execution_date=datetime.utcnow(), + status='running' + ) + db.add(execution_log) + db.flush() + + # Fetch insights + result = await self._fetch_insights(task, db) + + # Update execution log + execution_time_ms = int((time.time() - start_time) * 1000) + execution_log.status = 'success' if result.success else 'failed' + execution_log.result_data = result.result_data + execution_log.error_message = result.error_message + execution_log.execution_time_ms = execution_time_ms + execution_log.data_source = result.result_data.get('data_source') if result.success else None + + # Update task based on result + task.last_check = datetime.utcnow() + + if result.success: + task.last_success = datetime.utcnow() + task.status = 'active' + task.failure_reason = None + # Schedule next check (7 days from now) + task.next_check = self.calculate_next_execution( + task=task, + frequency='Weekly', + last_execution=task.last_check + ) + else: + task.last_failure = datetime.utcnow() + task.failure_reason = result.error_message + task.status = 'failed' + # Schedule retry in 1 day + task.next_check = datetime.utcnow() + timedelta(days=1) + + task.updated_at = datetime.utcnow() + db.commit() + + return result + + except Exception as e: + execution_time_ms = int((time.time() - start_time) * 1000) + + # Set database session for exception handler + self.exception_handler.db = db + + error_result = self.exception_handler.handle_task_execution_error( + task=task, + error=e, + execution_time_ms=execution_time_ms, + context="Bing insights fetch" + ) + + # Update task + task.last_check = datetime.utcnow() + task.last_failure = datetime.utcnow() + task.failure_reason = str(e) + task.status = 'failed' + task.next_check = datetime.utcnow() + timedelta(days=1) + task.updated_at = datetime.utcnow() + db.commit() + + return error_result + + async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult: + """ + Fetch Bing insights data. + + On first run (no last_success), loads cached data. + On subsequent runs, fetches fresh data from API. + """ + user_id = task.user_id + site_url = task.site_url + + try: + # Check if this is first run (no previous success) + is_first_run = task.last_success is None + + if is_first_run: + # First run: Try to load from cache + self.logger.info(f"First run for Bing insights task {task.id} - loading cached data") + cached_data = self._load_cached_data(user_id, site_url) + + if cached_data: + self.logger.info(f"Loaded cached Bing data for user {user_id}") + return TaskExecutionResult( + success=True, + result_data={ + 'data_source': 'cached', + 'insights': cached_data, + 'message': 'Loaded from cached data (first run)' + } + ) + else: + # No cached data - try to fetch from API + self.logger.info(f"No cached data found, fetching from Bing API") + return await self._fetch_fresh_data(user_id, site_url) + else: + # Subsequent run: Always fetch fresh data + self.logger.info(f"Subsequent run for Bing insights task {task.id} - fetching fresh data") + return await self._fetch_fresh_data(user_id, site_url) + + except Exception as e: + self.logger.error(f"Error fetching Bing insights for user {user_id}: {e}", exc_info=True) + return TaskExecutionResult( + success=False, + error_message=f"Failed to fetch Bing insights: {str(e)}", + result_data={'error': str(e)} + ) + + def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]: + """Load most recent cached Bing data from database.""" + try: + # Get analytics summary from storage service + summary = self.storage_service.get_analytics_summary( + user_id=user_id, + site_url=site_url or '', + days=30 + ) + + if summary and isinstance(summary, dict): + self.logger.info(f"Found cached Bing data for user {user_id}") + return summary + + return None + + except Exception as e: + self.logger.warning(f"Error loading cached Bing data: {e}") + return None + + async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult: + """Fetch fresh Bing insights from API.""" + try: + # Check if user has active tokens + token_status = self.bing_oauth.get_user_token_status(user_id) + + if not token_status.get('has_active_tokens'): + return TaskExecutionResult( + success=False, + error_message="Bing Webmaster tokens not available or expired", + result_data={'error': 'No active tokens'} + ) + + # Get user's sites + sites = self.bing_oauth.get_user_sites(user_id) + + if not sites: + return TaskExecutionResult( + success=False, + error_message="No Bing Webmaster sites found", + result_data={'error': 'No sites found'} + ) + + # Use provided site_url or first site + if not site_url: + site_url = sites[0].get('Url', '') if isinstance(sites[0], dict) else sites[0] + + # Get active token + active_tokens = token_status.get('active_tokens', []) + if not active_tokens: + return TaskExecutionResult( + success=False, + error_message="No active Bing Webmaster tokens", + result_data={'error': 'No tokens'} + ) + + # For now, use stored analytics data (Bing API integration can be added later) + # This ensures we have data available even if the API class doesn't exist yet + summary = self.storage_service.get_analytics_summary(user_id, site_url, days=30) + + if summary and isinstance(summary, dict): + # Format insights data from stored analytics + insights_data = { + 'site_url': site_url, + 'date_range': { + 'start': (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'), + 'end': datetime.now().strftime('%Y-%m-%d') + }, + 'summary': summary.get('summary', {}), + 'fetched_at': datetime.utcnow().isoformat() + } + + self.logger.info( + f"Successfully loaded Bing insights from storage for user {user_id}, site {site_url}" + ) + + return TaskExecutionResult( + success=True, + result_data={ + 'data_source': 'storage', + 'insights': insights_data, + 'message': 'Loaded from stored analytics data' + } + ) + else: + # No stored data available + return TaskExecutionResult( + success=False, + error_message="No Bing analytics data available. Data will be collected during next onboarding refresh.", + result_data={'error': 'No stored data available'} + ) + + except Exception as e: + self.logger.error(f"Error fetching fresh Bing data: {e}", exc_info=True) + return TaskExecutionResult( + success=False, + error_message=f"API fetch failed: {str(e)}", + result_data={'error': str(e)} + ) + + def calculate_next_execution( + self, + task: PlatformInsightsTask, + frequency: str, + last_execution: Optional[datetime] = None + ) -> datetime: + """ + Calculate next execution time based on frequency. + + For platform insights, frequency is always 'Weekly' (7 days). + """ + if last_execution is None: + last_execution = datetime.utcnow() + + if frequency == 'Weekly': + return last_execution + timedelta(days=7) + elif frequency == 'Daily': + return last_execution + timedelta(days=1) + else: + # Default to weekly + return last_execution + timedelta(days=7) + diff --git a/backend/services/scheduler/executors/gsc_insights_executor.py b/backend/services/scheduler/executors/gsc_insights_executor.py new file mode 100644 index 00000000..8d03cc55 --- /dev/null +++ b/backend/services/scheduler/executors/gsc_insights_executor.py @@ -0,0 +1,307 @@ +""" +GSC Insights Task Executor +Handles execution of GSC insights fetch tasks for connected platforms. +""" + +import logging +import os +import time +import json +from datetime import datetime, timedelta +from typing import Dict, Any, Optional +from sqlalchemy.orm import Session +import sqlite3 + +from ..core.executor_interface import TaskExecutor, TaskExecutionResult +from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler +from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog +from services.gsc_service import GSCService +from utils.logger_utils import get_service_logger + +logger = get_service_logger("gsc_insights_executor") + + +class GSCInsightsExecutor(TaskExecutor): + """ + Executor for GSC insights fetch tasks. + + Handles: + - Fetching GSC insights data weekly + - On first run: Loads existing cached data + - On subsequent runs: Fetches fresh data from GSC API + - Logging results and updating task status + """ + + def __init__(self): + self.logger = logger + self.exception_handler = SchedulerExceptionHandler() + self.gsc_service = GSCService() + + async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult: + """ + Execute a GSC insights fetch task. + + Args: + task: PlatformInsightsTask instance + db: Database session + + Returns: + TaskExecutionResult + """ + start_time = time.time() + user_id = task.user_id + site_url = task.site_url + + try: + self.logger.info( + f"Executing GSC insights fetch: task_id={task.id} | " + f"user_id={user_id} | site_url={site_url}" + ) + + # Create execution log + execution_log = PlatformInsightsExecutionLog( + task_id=task.id, + execution_date=datetime.utcnow(), + status='running' + ) + db.add(execution_log) + db.flush() + + # Fetch insights + result = await self._fetch_insights(task, db) + + # Update execution log + execution_time_ms = int((time.time() - start_time) * 1000) + execution_log.status = 'success' if result.success else 'failed' + execution_log.result_data = result.result_data + execution_log.error_message = result.error_message + execution_log.execution_time_ms = execution_time_ms + execution_log.data_source = result.result_data.get('data_source') if result.success else None + + # Update task based on result + task.last_check = datetime.utcnow() + + if result.success: + task.last_success = datetime.utcnow() + task.status = 'active' + task.failure_reason = None + # Schedule next check (7 days from now) + task.next_check = self.calculate_next_execution( + task=task, + frequency='Weekly', + last_execution=task.last_check + ) + else: + task.last_failure = datetime.utcnow() + task.failure_reason = result.error_message + task.status = 'failed' + # Schedule retry in 1 day + task.next_check = datetime.utcnow() + timedelta(days=1) + + task.updated_at = datetime.utcnow() + db.commit() + + return result + + except Exception as e: + execution_time_ms = int((time.time() - start_time) * 1000) + + # Set database session for exception handler + self.exception_handler.db = db + + error_result = self.exception_handler.handle_task_execution_error( + task=task, + error=e, + execution_time_ms=execution_time_ms, + context="GSC insights fetch" + ) + + # Update task + task.last_check = datetime.utcnow() + task.last_failure = datetime.utcnow() + task.failure_reason = str(e) + task.status = 'failed' + task.next_check = datetime.utcnow() + timedelta(days=1) + task.updated_at = datetime.utcnow() + db.commit() + + return error_result + + async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult: + """ + Fetch GSC insights data. + + On first run (no last_success), loads cached data. + On subsequent runs, fetches fresh data from API. + """ + user_id = task.user_id + site_url = task.site_url + + try: + # Check if this is first run (no previous success) + is_first_run = task.last_success is None + + if is_first_run: + # First run: Try to load from cache + self.logger.info(f"First run for GSC insights task {task.id} - loading cached data") + cached_data = self._load_cached_data(user_id, site_url) + + if cached_data: + self.logger.info(f"Loaded cached GSC data for user {user_id}") + return TaskExecutionResult( + success=True, + result_data={ + 'data_source': 'cached', + 'insights': cached_data, + 'message': 'Loaded from cached data (first run)' + } + ) + else: + # No cached data - try to fetch from API + self.logger.info(f"No cached data found, fetching from GSC API") + return await self._fetch_fresh_data(user_id, site_url) + else: + # Subsequent run: Always fetch fresh data + self.logger.info(f"Subsequent run for GSC insights task {task.id} - fetching fresh data") + return await self._fetch_fresh_data(user_id, site_url) + + except Exception as e: + self.logger.error(f"Error fetching GSC insights for user {user_id}: {e}", exc_info=True) + return TaskExecutionResult( + success=False, + error_message=f"Failed to fetch GSC insights: {str(e)}", + result_data={'error': str(e)} + ) + + def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]: + """Load most recent cached GSC data from database.""" + try: + db_path = self.gsc_service.db_path + + with sqlite3.connect(db_path) as conn: + cursor = conn.cursor() + + # Find most recent cached data + if site_url: + cursor.execute(''' + SELECT data_json, created_at + FROM gsc_data_cache + WHERE user_id = ? AND site_url = ? AND data_type = 'analytics' + ORDER BY created_at DESC + LIMIT 1 + ''', (user_id, site_url)) + else: + cursor.execute(''' + SELECT data_json, created_at + FROM gsc_data_cache + WHERE user_id = ? AND data_type = 'analytics' + ORDER BY created_at DESC + LIMIT 1 + ''', (user_id,)) + + result = cursor.fetchone() + + if result: + data_json, created_at = result + insights_data = json.loads(data_json) if isinstance(data_json, str) else data_json + + self.logger.info( + f"Found cached GSC data from {created_at} for user {user_id}" + ) + + return insights_data + + return None + + except Exception as e: + self.logger.warning(f"Error loading cached GSC data: {e}") + return None + + async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult: + """Fetch fresh GSC insights from API.""" + try: + # If no site_url, get first site + if not site_url: + sites = self.gsc_service.get_site_list(user_id) + if not sites: + return TaskExecutionResult( + success=False, + error_message="No GSC sites found for user", + result_data={'error': 'No sites found'} + ) + site_url = sites[0]['siteUrl'] + + # Get analytics for last 30 days + end_date = datetime.now().strftime('%Y-%m-%d') + start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d') + + # Fetch search analytics + search_analytics = self.gsc_service.get_search_analytics( + user_id=user_id, + site_url=site_url, + start_date=start_date, + end_date=end_date + ) + + if 'error' in search_analytics: + return TaskExecutionResult( + success=False, + error_message=search_analytics.get('error', 'Unknown error'), + result_data=search_analytics + ) + + # Format insights data + insights_data = { + 'site_url': site_url, + 'date_range': { + 'start': start_date, + 'end': end_date + }, + 'overall_metrics': search_analytics.get('overall_metrics', {}), + 'query_data': search_analytics.get('query_data', {}), + 'fetched_at': datetime.utcnow().isoformat() + } + + self.logger.info( + f"Successfully fetched GSC insights for user {user_id}, site {site_url}" + ) + + return TaskExecutionResult( + success=True, + result_data={ + 'data_source': 'api', + 'insights': insights_data, + 'message': 'Fetched fresh data from GSC API' + } + ) + + except Exception as e: + self.logger.error(f"Error fetching fresh GSC data: {e}", exc_info=True) + return TaskExecutionResult( + success=False, + error_message=f"API fetch failed: {str(e)}", + result_data={'error': str(e)} + ) + + def calculate_next_execution( + self, + task: PlatformInsightsTask, + frequency: str, + last_execution: Optional[datetime] = None + ) -> datetime: + """ + Calculate next execution time based on frequency. + + For platform insights, frequency is always 'Weekly' (7 days). + """ + if last_execution is None: + last_execution = datetime.utcnow() + + if frequency == 'Weekly': + return last_execution + timedelta(days=7) + elif frequency == 'Daily': + return last_execution + timedelta(days=1) + else: + # Default to weekly + return last_execution + timedelta(days=7) + diff --git a/backend/services/scheduler/executors/oauth_token_monitoring_executor.py b/backend/services/scheduler/executors/oauth_token_monitoring_executor.py index 761f73cc..ee91057a 100644 --- a/backend/services/scheduler/executors/oauth_token_monitoring_executor.py +++ b/backend/services/scheduler/executors/oauth_token_monitoring_executor.py @@ -197,7 +197,7 @@ class OAuthTokenMonitoringExecutor(TaskExecutor): - GSC: gsc_credentials table (via GSCService) - Bing: bing_oauth_tokens table (via BingOAuthService) - WordPress: wordpress_oauth_tokens table (via WordPressOAuthService) - - Wix: Currently in frontend sessionStorage (backend storage TODO) + - Wix: wix_oauth_tokens table (via WixOAuthService) Args: task: OAuthTokenMonitoringTask instance diff --git a/backend/services/scheduler/executors/website_analysis_executor.py b/backend/services/scheduler/executors/website_analysis_executor.py new file mode 100644 index 00000000..7a140e54 --- /dev/null +++ b/backend/services/scheduler/executors/website_analysis_executor.py @@ -0,0 +1,458 @@ +""" +Website Analysis Task Executor +Handles execution of website analysis tasks for user and competitor websites. +""" + +import logging +import os +import time +import asyncio +from datetime import datetime, timedelta +from typing import Dict, Any, Optional +from sqlalchemy.orm import Session +from functools import partial +from urllib.parse import urlparse + +from ..core.executor_interface import TaskExecutor, TaskExecutionResult +from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler +from models.website_analysis_monitoring_models import WebsiteAnalysisTask, WebsiteAnalysisExecutionLog +from models.onboarding import CompetitorAnalysis, OnboardingSession +from utils.logger_utils import get_service_logger + +# Import website analysis services +from services.component_logic.web_crawler_logic import WebCrawlerLogic +from services.component_logic.style_detection_logic import StyleDetectionLogic +from services.website_analysis_service import WebsiteAnalysisService + +logger = get_service_logger("website_analysis_executor") + + +class WebsiteAnalysisExecutor(TaskExecutor): + """ + Executor for website analysis tasks. + + Handles: + - Analyzing user's website (updates existing WebsiteAnalysis record) + - Analyzing competitor websites (stores in CompetitorAnalysis table) + - Logging results and updating task status + - Scheduling next execution based on frequency_days + """ + + def __init__(self): + self.logger = logger + self.exception_handler = SchedulerExceptionHandler() + self.crawler_logic = WebCrawlerLogic() + self.style_logic = StyleDetectionLogic() + + async def execute_task( + self, + task: WebsiteAnalysisTask, + db: Session + ) -> TaskExecutionResult: + """ + Execute a website analysis task. + + This performs complete website analysis using the same logic as + /api/onboarding/style-detection/complete endpoint. + + Args: + task: WebsiteAnalysisTask instance + db: Database session + + Returns: + TaskExecutionResult + """ + start_time = time.time() + user_id = task.user_id + website_url = task.website_url + task_type = task.task_type + + try: + self.logger.info( + f"Executing website analysis: task_id={task.id} | " + f"user_id={user_id} | url={website_url} | type={task_type}" + ) + + # Create execution log + execution_log = WebsiteAnalysisExecutionLog( + task_id=task.id, + execution_date=datetime.utcnow(), + status='running' + ) + db.add(execution_log) + db.flush() + + # Perform website analysis + result = await self._perform_website_analysis( + website_url=website_url, + user_id=user_id, + task_type=task_type, + task=task, + db=db + ) + + # Update execution log + execution_time_ms = int((time.time() - start_time) * 1000) + execution_log.status = 'success' if result.success else 'failed' + execution_log.result_data = result.result_data + execution_log.error_message = result.error_message + execution_log.execution_time_ms = execution_time_ms + + # Update task based on result + task.last_check = datetime.utcnow() + task.updated_at = datetime.utcnow() + + if result.success: + task.last_success = datetime.utcnow() + task.status = 'active' + task.failure_reason = None + # Schedule next check based on frequency_days + task.next_check = self.calculate_next_execution( + task=task, + frequency='Custom', + last_execution=task.last_check, + custom_days=task.frequency_days + ) + + # Commit all changes to database + db.commit() + + self.logger.info( + f"Website analysis completed successfully for task {task.id}. " + f"Next check scheduled for {task.next_check}" + ) + return result + else: + task.last_failure = datetime.utcnow() + task.failure_reason = result.error_message + task.status = 'failed' + # Do NOT update next_check - wait for manual retry + + # Commit all changes to database + db.commit() + + self.logger.warning( + f"Website analysis failed for task {task.id}. " + f"Error: {result.error_message}. Waiting for manual retry." + ) + return result + + except Exception as e: + execution_time_ms = int((time.time() - start_time) * 1000) + + # Set database session for exception handler + self.exception_handler.db = db + + # Create structured error + error = TaskExecutionError( + message=f"Error executing website analysis task {task.id}: {str(e)}", + user_id=user_id, + task_id=task.id, + task_type="website_analysis", + execution_time_ms=execution_time_ms, + context={ + "website_url": website_url, + "task_type": task_type, + "user_id": user_id + }, + original_error=e + ) + + # Handle exception with structured logging + self.exception_handler.handle_exception(error) + + # Update execution log with error + try: + execution_log = WebsiteAnalysisExecutionLog( + task_id=task.id, + execution_date=datetime.utcnow(), + status='failed', + error_message=str(e), + execution_time_ms=execution_time_ms, + result_data={ + "error_type": error.error_type.value, + "severity": error.severity.value, + "context": error.context + } + ) + db.add(execution_log) + + task.last_failure = datetime.utcnow() + task.failure_reason = str(e) + task.status = 'failed' + task.last_check = datetime.utcnow() + task.updated_at = datetime.utcnow() + # Do NOT update next_check - wait for manual retry + + db.commit() + except Exception as commit_error: + db_error = DatabaseError( + message=f"Error saving execution log: {str(commit_error)}", + user_id=user_id, + task_id=task.id, + original_error=commit_error + ) + self.exception_handler.handle_exception(db_error) + db.rollback() + + return TaskExecutionResult( + success=False, + error_message=str(e), + execution_time_ms=execution_time_ms, + retryable=True + ) + + async def _perform_website_analysis( + self, + website_url: str, + user_id: str, + task_type: str, + task: WebsiteAnalysisTask, + db: Session + ) -> TaskExecutionResult: + """ + Perform website analysis using existing service logic. + + Reuses the same logic as /api/onboarding/style-detection/complete. + """ + try: + # Step 1: Crawl website content + self.logger.info(f"Crawling website: {website_url}") + crawl_result = await self.crawler_logic.crawl_website(website_url) + + if not crawl_result.get('success'): + error_msg = crawl_result.get('error', 'Crawling failed') + self.logger.error(f"Crawling failed for {website_url}: {error_msg}") + return TaskExecutionResult( + success=False, + error_message=f"Crawling failed: {error_msg}", + result_data={'crawl_result': crawl_result}, + retryable=True + ) + + # Step 2: Run style analysis and patterns analysis in parallel + self.logger.info(f"Running style analysis for {website_url}") + + async def run_style_analysis(): + """Run style analysis in executor""" + loop = asyncio.get_event_loop() + return await loop.run_in_executor( + None, + partial(self.style_logic.analyze_content_style, crawl_result['content']) + ) + + async def run_patterns_analysis(): + """Run patterns analysis in executor""" + loop = asyncio.get_event_loop() + return await loop.run_in_executor( + None, + partial(self.style_logic.analyze_style_patterns, crawl_result['content']) + ) + + # Execute style and patterns analysis in parallel + style_analysis, patterns_result = await asyncio.gather( + run_style_analysis(), + run_patterns_analysis(), + return_exceptions=True + ) + + # Check for exceptions + if isinstance(style_analysis, Exception): + self.logger.error(f"Style analysis exception: {style_analysis}") + return TaskExecutionResult( + success=False, + error_message=f"Style analysis failed: {str(style_analysis)}", + retryable=True + ) + + if isinstance(patterns_result, Exception): + self.logger.warning(f"Patterns analysis exception: {patterns_result}") + patterns_result = None + + # Step 3: Generate style guidelines + style_guidelines = None + if style_analysis and style_analysis.get('success'): + loop = asyncio.get_event_loop() + guidelines_result = await loop.run_in_executor( + None, + partial(self.style_logic.generate_style_guidelines, style_analysis.get('analysis', {})) + ) + if guidelines_result and guidelines_result.get('success'): + style_guidelines = guidelines_result.get('guidelines') + + # Prepare analysis data + analysis_data = { + 'crawl_result': crawl_result, + 'style_analysis': style_analysis.get('analysis') if style_analysis and style_analysis.get('success') else None, + 'style_patterns': patterns_result if patterns_result and not isinstance(patterns_result, Exception) else None, + 'style_guidelines': style_guidelines, + } + + # Step 4: Store results based on task type + if task_type == 'user_website': + # Update existing WebsiteAnalysis record + await self._update_user_website_analysis( + user_id=user_id, + website_url=website_url, + analysis_data=analysis_data, + db=db + ) + elif task_type == 'competitor': + # Store in CompetitorAnalysis table + await self._store_competitor_analysis( + user_id=user_id, + competitor_url=website_url, + competitor_id=task.competitor_id, + analysis_data=analysis_data, + db=db + ) + + self.logger.info(f"Website analysis completed successfully for {website_url}") + + return TaskExecutionResult( + success=True, + result_data=analysis_data, + retryable=False + ) + + except Exception as e: + self.logger.error(f"Error performing website analysis: {e}", exc_info=True) + return TaskExecutionResult( + success=False, + error_message=str(e), + retryable=True + ) + + async def _update_user_website_analysis( + self, + user_id: str, + website_url: str, + analysis_data: Dict[str, Any], + db: Session + ): + """Update existing WebsiteAnalysis record for user's website.""" + try: + # Convert Clerk user ID to integer (same as component_logic.py) + # Use the same conversion logic as the website analysis API + import hashlib + user_id_int = int(hashlib.sha256(user_id.encode()).hexdigest()[:15], 16) + + # Use WebsiteAnalysisService to update + analysis_service = WebsiteAnalysisService(db) + + # Prepare data in format expected by save_analysis + response_data = { + 'crawl_result': analysis_data.get('crawl_result'), + 'style_analysis': analysis_data.get('style_analysis'), + 'style_patterns': analysis_data.get('style_patterns'), + 'style_guidelines': analysis_data.get('style_guidelines'), + } + + # Save/update analysis + analysis_id = analysis_service.save_analysis( + session_id=user_id_int, + website_url=website_url, + analysis_data=response_data + ) + + if analysis_id: + self.logger.info(f"Updated user website analysis for {website_url} (analysis_id: {analysis_id})") + else: + self.logger.warning(f"Failed to update user website analysis for {website_url}") + + except Exception as e: + self.logger.error(f"Error updating user website analysis: {e}", exc_info=True) + raise + + async def _store_competitor_analysis( + self, + user_id: str, + competitor_url: str, + competitor_id: Optional[str], + analysis_data: Dict[str, Any], + db: Session + ): + """Store competitor analysis in CompetitorAnalysis table.""" + try: + # Get onboarding session for user + session = db.query(OnboardingSession).filter( + OnboardingSession.user_id == user_id + ).first() + + if not session: + raise ValueError(f"No onboarding session found for user {user_id}") + + # Extract domain from URL + parsed_url = urlparse(competitor_url) + competitor_domain = parsed_url.netloc or competitor_id + + # Check if analysis already exists for this competitor + existing = db.query(CompetitorAnalysis).filter( + CompetitorAnalysis.session_id == session.id, + CompetitorAnalysis.competitor_url == competitor_url + ).first() + + if existing: + # Update existing analysis + existing.analysis_data = analysis_data + existing.analysis_date = datetime.utcnow() + existing.status = 'completed' + existing.error_message = None + existing.warning_message = None + existing.updated_at = datetime.utcnow() + self.logger.info(f"Updated competitor analysis for {competitor_url}") + else: + # Create new analysis + competitor_analysis = CompetitorAnalysis( + session_id=session.id, + competitor_url=competitor_url, + competitor_domain=competitor_domain, + analysis_data=analysis_data, + status='completed', + analysis_date=datetime.utcnow() + ) + db.add(competitor_analysis) + self.logger.info(f"Created new competitor analysis for {competitor_url}") + + db.commit() + + except Exception as e: + db.rollback() + self.logger.error(f"Error storing competitor analysis: {e}", exc_info=True) + raise + + def calculate_next_execution( + self, + task: WebsiteAnalysisTask, + frequency: str, + last_execution: Optional[datetime] = None, + custom_days: Optional[int] = None + ) -> datetime: + """ + Calculate next execution time based on frequency or custom days. + + Args: + task: WebsiteAnalysisTask instance + frequency: Frequency string ('Custom' for website analysis) + last_execution: Last execution datetime (defaults to task.last_check or now) + custom_days: Custom number of days (from task.frequency_days) + + Returns: + Next execution datetime + """ + if last_execution is None: + last_execution = task.last_check if task.last_check else datetime.utcnow() + + # Use custom_days if provided, otherwise use task.frequency_days + days = custom_days if custom_days is not None else task.frequency_days + + if frequency == 'Custom' and days: + return last_execution + timedelta(days=days) + else: + # Default to task's frequency_days + self.logger.warning( + f"Unknown frequency '{frequency}' for website analysis task {task.id}. " + f"Using frequency_days={task.frequency_days}." + ) + return last_execution + timedelta(days=task.frequency_days) + diff --git a/backend/services/scheduler/utils/platform_insights_task_loader.py b/backend/services/scheduler/utils/platform_insights_task_loader.py new file mode 100644 index 00000000..3e156732 --- /dev/null +++ b/backend/services/scheduler/utils/platform_insights_task_loader.py @@ -0,0 +1,60 @@ +""" +Platform Insights Task Loader +Functions to load due platform insights tasks from database. +""" + +from datetime import datetime +from typing import List, Optional, Union +from sqlalchemy.orm import Session +from sqlalchemy import and_, or_ + +from models.platform_insights_monitoring_models import PlatformInsightsTask + + +def load_due_platform_insights_tasks( + db: Session, + user_id: Optional[Union[str, int]] = None, + platform: Optional[str] = None +) -> List[PlatformInsightsTask]: + """ + Load all platform insights tasks that are due for execution. + + Criteria: + - status == 'active' (only check active tasks) + - next_check <= now (or is None for first execution) + - Optional: user_id filter for specific user + - Optional: platform filter ('gsc' or 'bing') + + Args: + db: Database session + user_id: Optional user ID (Clerk string) to filter tasks + platform: Optional platform filter ('gsc' or 'bing') + + Returns: + List of due PlatformInsightsTask instances + """ + now = datetime.utcnow() + + # Build query for due tasks + query = db.query(PlatformInsightsTask).filter( + and_( + PlatformInsightsTask.status == 'active', + or_( + PlatformInsightsTask.next_check <= now, + PlatformInsightsTask.next_check.is_(None) + ) + ) + ) + + # Apply user filter if provided + if user_id is not None: + query = query.filter(PlatformInsightsTask.user_id == str(user_id)) + + # Apply platform filter if provided + if platform is not None: + query = query.filter(PlatformInsightsTask.platform == platform) + + tasks = query.all() + + return tasks + diff --git a/backend/services/scheduler/utils/website_analysis_task_loader.py b/backend/services/scheduler/utils/website_analysis_task_loader.py new file mode 100644 index 00000000..81631eee --- /dev/null +++ b/backend/services/scheduler/utils/website_analysis_task_loader.py @@ -0,0 +1,54 @@ +""" +Website Analysis Task Loader +Functions to load due website analysis tasks from database. +""" + +from datetime import datetime +from typing import List, Optional, Union +from sqlalchemy.orm import Session +from sqlalchemy import and_, or_ + +from models.website_analysis_monitoring_models import WebsiteAnalysisTask + + +def load_due_website_analysis_tasks( + db: Session, + user_id: Optional[Union[str, int]] = None +) -> List[WebsiteAnalysisTask]: + """ + Load all website analysis tasks that are due for execution. + + Criteria: + - status == 'active' (only check active tasks) + - next_check <= now (or is None for first execution) + - Optional: user_id filter for specific user (for user isolation) + + User isolation is enforced through filtering by user_id when provided. + If no user_id is provided, loads tasks for all users (for system-wide monitoring). + + Args: + db: Database session + user_id: Optional user ID (Clerk string) to filter tasks (if None, loads all users' tasks) + + Returns: + List of due WebsiteAnalysisTask instances + """ + now = datetime.utcnow() + + # Build query for due tasks + query = db.query(WebsiteAnalysisTask).filter( + and_( + WebsiteAnalysisTask.status == 'active', + or_( + WebsiteAnalysisTask.next_check <= now, + WebsiteAnalysisTask.next_check.is_(None) + ) + ) + ) + + # Apply user filter if provided (for user isolation) + if user_id is not None: + query = query.filter(WebsiteAnalysisTask.user_id == str(user_id)) + + return query.all() + diff --git a/backend/services/user_api_key_context.py b/backend/services/user_api_key_context.py index 63968b9e..5849c030 100644 --- a/backend/services/user_api_key_context.py +++ b/backend/services/user_api_key_context.py @@ -144,6 +144,11 @@ def get_exa_key(user_id: Optional[str] = None) -> Optional[str]: return UserAPIKeyContext.get_user_key(user_id, 'exa') +def get_tavily_key(user_id: Optional[str] = None) -> Optional[str]: + """Get Tavily API key for user.""" + return UserAPIKeyContext.get_user_key(user_id, 'tavily') + + def get_copilotkit_key(user_id: Optional[str] = None) -> Optional[str]: """Get CopilotKit API key for user.""" return UserAPIKeyContext.get_user_key(user_id, 'copilotkit') diff --git a/backend/services/website_analysis_monitoring_service.py b/backend/services/website_analysis_monitoring_service.py new file mode 100644 index 00000000..35255d81 --- /dev/null +++ b/backend/services/website_analysis_monitoring_service.py @@ -0,0 +1,369 @@ +""" +Website Analysis Monitoring Service +Creates and manages website analysis monitoring tasks. +""" + +from datetime import datetime, timedelta +from typing import List, Dict, Any, Optional +from sqlalchemy.orm import Session +from urllib.parse import urlparse +import hashlib + +from models.website_analysis_monitoring_models import WebsiteAnalysisTask +from models.onboarding import OnboardingSession +from services.onboarding.database_service import OnboardingDatabaseService +from utils.logger_utils import get_service_logger + +logger = get_service_logger("website_analysis_monitoring") + + +def clerk_user_id_to_int(user_id: str) -> int: + """ + Convert Clerk user ID to consistent integer for database session_id. + Uses SHA256 hashing for deterministic, consistent results. + This MUST match the pattern used in component_logic.py for onboarding. + + Args: + user_id: Clerk user ID (e.g., 'user_33Gz1FPI86VDXhRY8QN4ragRFGN') + + Returns: + int: Deterministic integer derived from user ID + """ + user_id_hash = hashlib.sha256(user_id.encode()).hexdigest() + return int(user_id_hash[:8], 16) % 2147483647 + + +def create_website_analysis_tasks(user_id: str, db: Session) -> Dict[str, Any]: + """ + Create website analysis tasks for user's website and all competitors. + + This should be called after onboarding completion. + + Args: + user_id: Clerk user ID (string) + db: Database session + + Returns: + Dictionary with success status and task details + """ + try: + logger.info(f"[Website Analysis Tasks] Creating tasks for user: {user_id}") + + # Get user's website URL from onboarding + onboarding_service = OnboardingDatabaseService(db=db) + website_analysis = onboarding_service.get_website_analysis(user_id, db) + + if not website_analysis: + logger.warning(f"[Website Analysis Tasks] No website analysis found for user {user_id}") + # Try direct query using hash-based session_id (must match onboarding pattern) + try: + from models.onboarding import WebsiteAnalysis + session_id_int = clerk_user_id_to_int(user_id) + + logger.info( + f"[Website Analysis Tasks] Querying WebsiteAnalysis with hash-based session_id: {session_id_int}" + ) + + analysis = db.query(WebsiteAnalysis).filter( + WebsiteAnalysis.session_id == session_id_int + ).order_by(WebsiteAnalysis.created_at.desc()).first() + + if analysis: + logger.info(f"[Website Analysis Tasks] ✅ Found analysis via hash-based query: {analysis.website_url}") + website_analysis = analysis.to_dict() + except Exception as e: + logger.debug(f"[Website Analysis Tasks] Direct query fallback failed: {e}") + + if not website_analysis: + return { + 'success': False, + 'error': 'No website analysis found. Complete onboarding first.' + } + + website_url = website_analysis.get('website_url') + + # Log the actual value for debugging (always log, not just debug level) + logger.info( + f"[Website Analysis Tasks] website_url from dict: {repr(website_url)} " + f"(type: {type(website_url).__name__}, truthy: {bool(website_url)})" + ) + + # Check if website_url is None, empty string, or whitespace + if not website_url or (isinstance(website_url, str) and not website_url.strip()): + # Log what we actually got for debugging + logger.warning( + f"[Website Analysis Tasks] No website URL found for user {user_id}. " + f"Analysis keys: {list(website_analysis.keys()) if website_analysis else 'None'}, " + f"website_url value: {repr(website_url)}" + ) + + # Try direct access to the model using hash-based session_id + # This MUST use the same hash function as onboarding (clerk_user_id_to_int) + try: + from models.onboarding import WebsiteAnalysis + session_id_int = clerk_user_id_to_int(user_id) + + logger.info( + f"[Website Analysis Tasks] Querying WebsiteAnalysis with hash-based session_id: {session_id_int} " + f"for user {user_id}" + ) + + analysis = db.query(WebsiteAnalysis).filter( + WebsiteAnalysis.session_id == session_id_int + ).order_by(WebsiteAnalysis.created_at.desc()).first() + + if analysis: + logger.info( + f"[Website Analysis Tasks] Direct model access - " + f"website_url: {repr(analysis.website_url)}, " + f"type: {type(analysis.website_url).__name__ if analysis.website_url else 'None'}, " + f"id: {analysis.id}, session_id: {analysis.session_id}" + ) + + if analysis.website_url: + website_url = analysis.website_url + logger.info(f"[Website Analysis Tasks] ✅ Retrieved website_url via hash-based query: {website_url}") + else: + # Try to extract URL from crawl_result if website_url is NULL + if analysis.crawl_result and isinstance(analysis.crawl_result, dict): + # Check multiple possible locations for URL + crawl_url = ( + analysis.crawl_result.get('url') or + analysis.crawl_result.get('website_url') or + (analysis.crawl_result.get('content', {}).get('domain_info', {}).get('domain') if isinstance(analysis.crawl_result.get('content'), dict) else None) + ) + + # If still not found, check if crawl_result has nested structure + if not crawl_url and 'content' in analysis.crawl_result: + content = analysis.crawl_result.get('content', {}) + if isinstance(content, dict): + # Check domain_info for domain + domain_info = content.get('domain_info', {}) + if isinstance(domain_info, dict): + crawl_url = domain_info.get('domain') or domain_info.get('url') + + if crawl_url: + # Ensure it's a full URL (add https:// if missing) + if crawl_url and not crawl_url.startswith(('http://', 'https://')): + crawl_url = f"https://{crawl_url}" + logger.info(f"[Website Analysis Tasks] ✅ Extracted website_url from crawl_result: {crawl_url}") + website_url = crawl_url + else: + logger.warning( + f"[Website Analysis Tasks] Cannot extract URL from crawl_result. " + f"crawl_result keys: {list(analysis.crawl_result.keys()) if isinstance(analysis.crawl_result, dict) else 'not a dict'}, " + f"Analysis ID: {analysis.id}" + ) + else: + logger.warning( + f"[Website Analysis Tasks] website_url is NULL and crawl_result is empty or invalid. " + f"Analysis ID: {analysis.id}, Status: {analysis.status}, " + f"crawl_result type: {type(analysis.crawl_result).__name__ if analysis.crawl_result else 'None'}" + ) + else: + logger.warning( + f"[Website Analysis Tasks] No WebsiteAnalysis record found for " + f"hash-based session_id {session_id_int} (user {user_id})" + ) + except Exception as e: + logger.warning(f"[Website Analysis Tasks] Hash-based query fallback failed: {e}", exc_info=True) + + if not website_url: + return { + 'success': False, + 'error': 'No website URL found in onboarding data. Please complete step 2 (Website Analysis) in onboarding.' + } + + logger.info(f"[Website Analysis Tasks] User website URL: {website_url}") + + tasks_created = [] + + # 1. Create task for user's website (optional recurring every 30 days) + user_task = _create_or_update_task( + db=db, + user_id=user_id, + website_url=website_url, + task_type='user_website', + frequency_days=30 # Optional: recurring every 30 days + ) + if user_task: + tasks_created.append(user_task) + logger.info(f"Created user website analysis task for {website_url}") + + # 2. Get competitors from onboarding + competitors = _get_competitors_from_onboarding(user_id, db) + logger.info( + f"[Website Analysis Tasks] Found {len(competitors)} competitors for user {user_id}. " + f"Competitors: {[c.get('url') or c.get('website_url') or c.get('domain') for c in competitors]}" + ) + + # 3. Create task for each competitor + for competitor in competitors: + competitor_url = competitor.get('url') or competitor.get('website_url') + if not competitor_url: + continue + + # Extract competitor identifier + competitor_id = competitor.get('domain') or competitor.get('id') or _extract_domain(competitor_url) + + competitor_task = _create_or_update_task( + db=db, + user_id=user_id, + website_url=competitor_url, + task_type='competitor', + competitor_id=competitor_id, + frequency_days=10 # Recurring every 10 days + ) + if competitor_task: + tasks_created.append(competitor_task) + logger.info(f"Created competitor analysis task for {competitor_url}") + + db.commit() + + logger.info(f"Created {len(tasks_created)} website analysis tasks for user {user_id}") + + return { + 'success': True, + 'tasks_created': len(tasks_created), + 'tasks': [{ + 'id': t.id, + 'url': t.website_url, + 'type': t.task_type, + 'next_check': t.next_check.isoformat() if t.next_check else None + } for t in tasks_created] + } + + except Exception as e: + logger.error(f"Error creating website analysis tasks for user {user_id}: {e}", exc_info=True) + db.rollback() + return { + 'success': False, + 'error': str(e) + } + + +def _create_or_update_task( + db: Session, + user_id: str, + website_url: str, + task_type: str, + competitor_id: Optional[str] = None, + frequency_days: int = 10 +) -> Optional[WebsiteAnalysisTask]: + """Create or update a website analysis task.""" + try: + # Check if task already exists + existing = db.query(WebsiteAnalysisTask).filter( + WebsiteAnalysisTask.user_id == user_id, + WebsiteAnalysisTask.website_url == website_url, + WebsiteAnalysisTask.task_type == task_type + ).first() + + if existing: + # Update existing task + existing.status = 'active' + existing.frequency_days = frequency_days + existing.next_check = datetime.utcnow() + timedelta(days=frequency_days) + existing.updated_at = datetime.utcnow() + if competitor_id: + existing.competitor_id = competitor_id + logger.info(f"Updated existing website analysis task {existing.id}") + return existing + + # Create new task + task = WebsiteAnalysisTask( + user_id=user_id, + website_url=website_url, + task_type=task_type, + competitor_id=competitor_id, + status='active', + frequency_days=frequency_days, + next_check=datetime.utcnow() + timedelta(days=frequency_days) + ) + db.add(task) + db.flush() + logger.info(f"Created new website analysis task {task.id} for {website_url}") + return task + + except Exception as e: + logger.error(f"Error creating/updating task: {e}", exc_info=True) + return None + + +def _get_competitors_from_onboarding(user_id: str, db: Session) -> List[Dict[str, Any]]: + """ + Get competitors from onboarding database. + + Competitors are stored in onboarding_sessions.step_data['step3_research_data']['competitors'] + or via Step3ResearchService. + """ + try: + # Get onboarding session + onboarding_service = OnboardingDatabaseService(db=db) + session = onboarding_service.get_session_by_user(user_id, db) + + if not session: + logger.warning(f"No onboarding session found for user {user_id}") + return [] + + # Try to get from step_data JSON column + competitors = [] + + # Method 1: Check if step_data column exists and has competitors + if hasattr(session, 'step_data') and session.step_data: + step_data = session.step_data if isinstance(session.step_data, dict) else {} + research_data = step_data.get('step3_research_data', {}) + competitors = research_data.get('competitors', []) + logger.info(f"[Competitor Retrieval] Method 1 (step_data): found {len(competitors)} competitors") + + # Method 2: If not found, try Step3ResearchService + if not competitors: + logger.info(f"[Competitor Retrieval] Attempting Step3ResearchService for user {user_id}, session_id: {session.id}") + try: + from api.onboarding_utils.step3_research_service import Step3ResearchService + import asyncio + step3_service = Step3ResearchService() + + # Run async function - handle both new and existing event loops + try: + loop = asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + research_data_result = loop.run_until_complete( + step3_service.get_research_data(str(session.id)) + ) + + logger.info(f"[Competitor Retrieval] Step3ResearchService result: {research_data_result.get('success')}") + + if research_data_result.get('success'): + research_data = research_data_result.get('research_data', {}) + step3_data = research_data.get('step3_research_data', {}) + competitors = step3_data.get('competitors', []) + logger.info(f"[Competitor Retrieval] Retrieved {len(competitors)} competitors from Step3ResearchService") + else: + logger.warning(f"[Competitor Retrieval] Step3ResearchService returned error: {research_data_result.get('error')}") + except Exception as e: + logger.warning(f"[Competitor Retrieval] Could not fetch competitors from Step3ResearchService: {e}", exc_info=True) + + # Ensure competitors is a list + if not isinstance(competitors, list): + competitors = [] + + logger.info(f"Found {len(competitors)} competitors for user {user_id}") + return competitors + + except Exception as e: + logger.error(f"Error getting competitors from onboarding: {e}", exc_info=True) + return [] + + +def _extract_domain(url: str) -> str: + """Extract domain from URL.""" + try: + parsed = urlparse(url) + return parsed.netloc or url + except Exception: + return url + diff --git a/frontend/src/api/platformInsightsMonitoring.ts b/frontend/src/api/platformInsightsMonitoring.ts new file mode 100644 index 00000000..ce9487ee --- /dev/null +++ b/frontend/src/api/platformInsightsMonitoring.ts @@ -0,0 +1,86 @@ +/** + * Platform Insights Monitoring API Client + * Provides typed functions for fetching platform insights (GSC/Bing) monitoring data. + */ + +import { apiClient } from './client'; + +// TypeScript interfaces +export interface PlatformInsightsTask { + id: number; + platform: 'gsc' | 'bing'; + site_url: string | null; + status: 'active' | 'failed' | 'paused'; + last_check: string | null; + last_success: string | null; + last_failure: string | null; + failure_reason: string | null; + next_check: string | null; + created_at: string; + updated_at: string; +} + +export interface PlatformInsightsStatusResponse { + success: boolean; + user_id: string; + gsc_tasks: PlatformInsightsTask[]; + bing_tasks: PlatformInsightsTask[]; + total_tasks: number; +} + +export interface PlatformInsightsExecutionLog { + id: number; + task_id: number; + execution_date: string; + status: 'success' | 'failed' | 'running' | 'skipped'; + result_data: any; + error_message: string | null; + execution_time_ms: number | null; + data_source: 'cached' | 'api' | 'onboarding' | 'storage' | null; + created_at: string; +} + +export interface PlatformInsightsLogsResponse { + success: boolean; + logs: PlatformInsightsExecutionLog[]; + total_count: number; +} + +/** + * Get platform insights status for a user + */ +export const getPlatformInsightsStatus = async ( + userId: string +): Promise => { + try { + const response = await apiClient.get(`/api/scheduler/platform-insights/status/${userId}`); + return response.data; + } catch (error: any) { + console.error('Error fetching platform insights status:', error); + throw new Error(error.response?.data?.detail || 'Failed to fetch platform insights status'); + } +}; + +/** + * Get execution logs for platform insights tasks + */ +export const getPlatformInsightsLogs = async ( + userId: string, + limit: number = 10, + taskId?: number +): Promise => { + try { + const params: any = { limit }; + if (taskId) { + params.task_id = taskId; + } + const response = await apiClient.get(`/api/scheduler/platform-insights/logs/${userId}`, { + params + }); + return response.data; + } catch (error: any) { + console.error('Error fetching platform insights logs:', error); + throw new Error(error.response?.data?.detail || 'Failed to fetch platform insights logs'); + } +}; + diff --git a/frontend/src/api/researchConfig.ts b/frontend/src/api/researchConfig.ts index b43810f7..00d8f029 100644 --- a/frontend/src/api/researchConfig.ts +++ b/frontend/src/api/researchConfig.ts @@ -9,8 +9,10 @@ import { apiClient } from './client'; export interface ProviderAvailability { google_available: boolean; exa_available: boolean; + tavily_available: boolean; gemini_key_status: 'configured' | 'missing'; exa_key_status: 'configured' | 'missing'; + tavily_key_status: 'configured' | 'missing'; } export interface PersonaDefaults { @@ -140,18 +142,85 @@ export const getResearchConfig = async (): Promise => { /** * Get or refresh research persona - * @param forceRefresh - If true, regenerate persona even if cache is valid + * @param forceRefresh - If true, regenerate persona even if cache is valid */ -export const refreshResearchPersona = async (forceRefresh: boolean = false): Promise => { +export const refreshResearchPersona = async (forceRefresh: boolean = false): Promise => { try { - const url = `/api/research/research-persona${forceRefresh ? '?force_refresh=true' : ''}`; + const url = `/api/research/research-persona${forceRefresh ? '?force_refresh=true' : ''}`; const response = await apiClient.get(url); return response.data; } catch (error: any) { - console.error('[researchConfig] Error refreshing research persona:', error?.response?.status || error?.message); - // Preserve the original error so subscription errors can be detected - // The apiClient interceptor should handle 429 errors, but we preserve the error structure + console.error('[researchConfig] Error refreshing research persona:', error?.response?.status || error?.message); + // Preserve the original error so subscription errors can be detected + // The apiClient interceptor should handle 429 errors, but we preserve the error structure throw error; } }; +/** + * Competitor Analysis Response Interface + */ +export interface CompetitorAnalysisResponse { + success: boolean; + competitors?: Array<{ + name?: string; + url?: string; + domain?: string; + description?: string; + similarity_score?: number; + [key: string]: any; + }>; + social_media_accounts?: Record; + social_media_citations?: Array<{ + platform?: string; + account?: string; + url?: string; + [key: string]: any; + }>; + research_summary?: { + total_competitors?: number; + industry_insights?: string; + [key: string]: any; + }; + analysis_timestamp?: string; + error?: string; +} + +/** + * Get competitor analysis data from onboarding + */ +export const getCompetitorAnalysis = async (): Promise => { + console.log('[getCompetitorAnalysis] ===== START: Fetching competitor analysis ====='); + try { + console.log('[getCompetitorAnalysis] Making GET request to /api/research/competitor-analysis'); + const response = await apiClient.get('/api/research/competitor-analysis'); + console.log('[getCompetitorAnalysis] ✅ Response received:', { + success: response.data?.success, + competitorsCount: response.data?.competitors?.length || 0, + error: response.data?.error, + fullResponse: response.data + }); + return response.data; + } catch (error: any) { + const statusCode = error?.response?.status; + const errorMessage = error?.response?.data?.detail || error?.response?.data?.error || error?.message || 'Unknown error'; + + console.error('[getCompetitorAnalysis] ❌ ERROR:', { + status: statusCode, + message: errorMessage, + fullError: error, + responseData: error?.response?.data + }); + + // Return error response instead of throwing + const errorResponse = { + success: false, + error: errorMessage + }; + console.log('[getCompetitorAnalysis] Returning error response:', errorResponse); + return errorResponse; + } finally { + console.log('[getCompetitorAnalysis] ===== END: Fetching competitor analysis ====='); + } +}; + diff --git a/frontend/src/api/schedulerDashboard.ts b/frontend/src/api/schedulerDashboard.ts index 0879d24c..5cc9569d 100644 --- a/frontend/src/api/schedulerDashboard.ts +++ b/frontend/src/api/schedulerDashboard.ts @@ -38,10 +38,14 @@ export interface SchedulerJob { job_store: string; user_job_store: string; function_name?: string | null; - platform?: string; // For OAuth token monitoring tasks - task_id?: number; // For OAuth token monitoring tasks + platform?: string; // For OAuth token monitoring tasks and platform insights + task_id?: number; // For OAuth token monitoring tasks, website analysis, and platform insights is_database_task?: boolean; // Flag to indicate DB task vs APScheduler job frequency?: string; // For OAuth tasks (e.g., 'Weekly') + task_type?: string; // For website analysis tasks ('user_website' or 'competitor') + task_category?: string; // 'website_analysis', 'platform_insights', 'oauth_token_monitoring' + website_url?: string | null; // For website analysis tasks + competitor_id?: number | null; // For competitor website analysis tasks } export interface UserIsolation { @@ -128,6 +132,11 @@ export interface SchedulerEventHistoryResponse { limit: number; offset: number; has_more: boolean; + date_filter?: { + days: number; + cutoff_date: string; + showing_events_since: string; + }; } /** @@ -199,17 +208,19 @@ export const getSchedulerJobs = async (): Promise => { /** * Get scheduler event history from database. * - * @param limit - Number of events to return (1-1000, default: 100) + * @param limit - Number of events to return (1-500, default: 5 for initial load, expand to 50 on hover) * @param offset - Pagination offset (default: 0) * @param eventType - Filter by event type (check_cycle, interval_adjustment, start, stop, etc.) + * @param days - Number of days to look back (1-90, default: 7 days) */ export const getSchedulerEventHistory = async ( - limit: number = 100, + limit: number = 5, offset: number = 0, - eventType?: 'check_cycle' | 'interval_adjustment' | 'start' | 'stop' | 'job_scheduled' | 'job_cancelled' | 'job_completed' | 'job_failed' + eventType?: 'check_cycle' | 'interval_adjustment' | 'start' | 'stop' | 'job_scheduled' | 'job_cancelled' | 'job_completed' | 'job_failed', + days: number = 7 ): Promise => { try { - const params: any = { limit, offset }; + const params: any = { limit, offset, days }; if (eventType) { params.event_type = eventType; } diff --git a/frontend/src/api/websiteAnalysisMonitoring.ts b/frontend/src/api/websiteAnalysisMonitoring.ts new file mode 100644 index 00000000..27122a9d --- /dev/null +++ b/frontend/src/api/websiteAnalysisMonitoring.ts @@ -0,0 +1,122 @@ +/** + * Website Analysis Monitoring API Client + * Provides typed functions for fetching website analysis monitoring data. + */ + +import { apiClient } from './client'; + +// TypeScript interfaces +export interface WebsiteAnalysisTask { + id: number; + website_url: string; + task_type: 'user_website' | 'competitor'; + competitor_id: string | null; + status: 'active' | 'failed' | 'paused'; + last_check: string | null; + last_success: string | null; + last_failure: string | null; + failure_reason: string | null; + next_check: string | null; + frequency_days: number; + created_at: string; + updated_at: string; +} + +export interface WebsiteAnalysisStatusResponse { + success: boolean; + data: { + user_id: string; + user_website_tasks: WebsiteAnalysisTask[]; + competitor_tasks: WebsiteAnalysisTask[]; + total_tasks: number; + active_tasks: number; + failed_tasks: number; + }; +} + +export interface WebsiteAnalysisExecutionLog { + id: number; + task_id: number; + website_url: string; + task_type: 'user_website' | 'competitor'; + execution_date: string; + status: 'success' | 'failed' | 'running' | 'skipped'; + result_data: any; + error_message: string | null; + execution_time_ms: number | null; + created_at: string; +} + +export interface WebsiteAnalysisLogsResponse { + logs: WebsiteAnalysisExecutionLog[]; + total_count: number; + limit: number; + offset: number; + has_more: boolean; +} + +export interface RetryWebsiteAnalysisResponse { + success: boolean; + message: string; + task: { + id: number; + website_url: string; + status: string; + next_check: string | null; + }; +} + +/** + * Get website analysis status for a user + */ +export const getWebsiteAnalysisStatus = async ( + userId: string +): Promise => { + try { + const response = await apiClient.get(`/api/scheduler/website-analysis/status/${userId}`); + return response.data; + } catch (error: any) { + console.error('Error fetching website analysis status:', error); + throw new Error(error.response?.data?.detail || 'Failed to fetch website analysis status'); + } +}; + +/** + * Get execution logs for website analysis tasks + */ +export const getWebsiteAnalysisLogs = async ( + userId: string, + limit: number = 10, + offset: number = 0, + taskId?: number +): Promise => { + try { + const params: any = { limit, offset }; + if (taskId) { + params.task_id = taskId; + } + const response = await apiClient.get(`/api/scheduler/website-analysis/logs/${userId}`, { + params + }); + return response.data; + } catch (error: any) { + console.error('Error fetching website analysis logs:', error); + throw new Error(error.response?.data?.detail || 'Failed to fetch website analysis logs'); + } +}; + +/** + * Manually retry a failed website analysis task + */ +export const retryWebsiteAnalysis = async ( + taskId: number +): Promise => { + try { + const response = await apiClient.post(`/api/scheduler/website-analysis/retry/${taskId}`); + return response.data; + } catch (error: any) { + console.error('Error retrying website analysis:', error); + throw new Error(error.response?.data?.detail || 'Failed to retry website analysis'); + } +}; + diff --git a/frontend/src/components/BlogWriter/BlogWriter.tsx b/frontend/src/components/BlogWriter/BlogWriter.tsx index 07e7ae16..3a831969 100644 --- a/frontend/src/components/BlogWriter/BlogWriter.tsx +++ b/frontend/src/components/BlogWriter/BlogWriter.tsx @@ -160,6 +160,11 @@ export const BlogWriter: React.FC = () => { seoRecommendationsApplied ); + // Update ref when navigateToPhase changes + React.useEffect(() => { + navigateToPhaseRef.current = navigateToPhase; + }, [navigateToPhase]); + // Phase restoration logic usePhaseRestoration({ copilotKitAvailable, @@ -184,6 +189,9 @@ export const BlogWriter: React.FC = () => { sections ); + // Store navigateToPhase in a ref for use in polling callbacks + const navigateToPhaseRef = React.useRef<((phase: string) => void) | null>(null); + // Polling hooks - extracted to useBlogWriterPolling const { researchPolling, @@ -198,6 +206,19 @@ export const BlogWriter: React.FC = () => { onOutlineComplete: handleOutlineComplete, onOutlineError: handleOutlineError, onSectionsUpdate: setSections, + onContentConfirmed: () => { + debug.log('[BlogWriter] Content generation completed - auto-confirming content'); + setContentConfirmed(true); + }, + navigateToPhase: (phase) => { + debug.log('[BlogWriter] Navigating to phase after content generation', { phase }); + // Use ref to access navigateToPhase (defined later in component) + if (navigateToPhaseRef.current) { + setTimeout(() => { + navigateToPhaseRef.current?.(phase); + }, 0); + } + }, }); // Modal visibility management - extracted to useModalVisibility diff --git a/frontend/src/components/BlogWriter/BlogWriterUtils/useBlogWriterPolling.ts b/frontend/src/components/BlogWriter/BlogWriterUtils/useBlogWriterPolling.ts index d49de00c..265a254c 100644 --- a/frontend/src/components/BlogWriter/BlogWriterUtils/useBlogWriterPolling.ts +++ b/frontend/src/components/BlogWriter/BlogWriterUtils/useBlogWriterPolling.ts @@ -12,6 +12,8 @@ interface UseBlogWriterPollingProps { onOutlineComplete: (outline: any) => void; onOutlineError: (error: any) => void; onSectionsUpdate: (sections: Record) => void; + onContentConfirmed?: () => void; // Callback when content generation completes + navigateToPhase?: (phase: string) => void; // Phase navigation function } export const useBlogWriterPolling = ({ @@ -19,6 +21,8 @@ export const useBlogWriterPolling = ({ onOutlineComplete, onOutlineError, onSectionsUpdate, + onContentConfirmed, + navigateToPhase, }: UseBlogWriterPollingProps) => { // Research polling hook (for context awareness) const researchPolling = useResearchPolling({ @@ -47,6 +51,15 @@ export const useBlogWriterPolling = ({ if (Object.keys(newSections).length > 0) { const sectionIds = Object.keys(newSections); blogWriterCache.cacheContent(newSections, sectionIds); + + // Auto-confirm content and navigate to SEO phase when content generation completes + // This happens when user clicks "Next:Confirm and generate content" + if (onContentConfirmed) { + onContentConfirmed(); + } + if (navigateToPhase) { + navigateToPhase('seo'); + } } } } catch (e) { diff --git a/frontend/src/components/BlogWriter/ResearchAction.tsx b/frontend/src/components/BlogWriter/ResearchAction.tsx index d472bfb6..e15124a1 100644 --- a/frontend/src/components/BlogWriter/ResearchAction.tsx +++ b/frontend/src/components/BlogWriter/ResearchAction.tsx @@ -1,4 +1,4 @@ -import React, { useState, useRef } from 'react'; +import React, { useState, useRef, useEffect } from 'react'; import { useCopilotAction } from '@copilotkit/react-core'; import { blogWriterApi, BlogResearchRequest, BlogResearchResponse } from '../../services/blogWriterApi'; import { useResearchPolling } from '../../hooks/usePolling'; @@ -60,6 +60,27 @@ export const ResearchAction: React.FC = ({ onResearchComple } }); + // Close modal when research completes (status becomes 'completed' or polling stops with result) + useEffect(() => { + if (showProgressModal && ( + polling.currentStatus === 'completed' || + (!polling.isPolling && polling.result && polling.currentStatus !== 'failed') + )) { + console.info('[ResearchAction] Closing modal - research completed', { + status: polling.currentStatus, + isPolling: polling.isPolling, + hasResult: !!polling.result + }); + // Small delay to show completion message before closing + const timer = setTimeout(() => { + setShowProgressModal(false); + setCurrentTaskId(null); + setCurrentMessage(''); + }, 500); + return () => clearTimeout(timer); + } + }, [polling.currentStatus, polling.isPolling, polling.result, showProgressModal]); + useCopilotActionTyped({ name: 'showResearchForm', description: 'Show keyword input form for blog research', @@ -235,12 +256,16 @@ export const ResearchAction: React.FC = ({ onResearchComple <> {showProgressModal && ( setShowProgressModal(false)} + onClose={() => { + console.info('[ResearchAction] Modal closed manually'); + setShowProgressModal(false); + setCurrentTaskId(null); + }} /> )} diff --git a/frontend/src/components/BlogWriter/SuggestionsGenerator.tsx b/frontend/src/components/BlogWriter/SuggestionsGenerator.tsx index 873e3a62..e011fd05 100644 --- a/frontend/src/components/BlogWriter/SuggestionsGenerator.tsx +++ b/frontend/src/components/BlogWriter/SuggestionsGenerator.tsx @@ -190,8 +190,21 @@ export const useSuggestions = ({ } } } else { - // No content yet, show generation option - items.push({ title: '📝 Generate all sections', message: 'Generate all sections of my blog post' }); + // No content yet, but outline is confirmed - show content generation options + if (hasContent) { + // Content exists but not confirmed - show confirmation and SEO options + items.push({ + title: 'Next: Run SEO Analysis', + message: 'Please analyze the blog content for SEO. Run the analyzeSEO action right away and do not ask for confirmation.' + }); + items.push({ + title: '📊 Content Analysis', + message: 'Analyze the flow and quality of my blog content to get improvement suggestions' + }); + } else { + // No content at all - show generation option (only if no content exists) + items.push({ title: '📝 Generate all sections', message: 'Generate all sections of my blog post' }); + } } } diff --git a/frontend/src/components/Research/OnboardingCompetitorModal.tsx b/frontend/src/components/Research/OnboardingCompetitorModal.tsx new file mode 100644 index 00000000..32d83d8c --- /dev/null +++ b/frontend/src/components/Research/OnboardingCompetitorModal.tsx @@ -0,0 +1,317 @@ +import React from 'react'; +import { + Dialog, + DialogTitle, + DialogContent, + DialogActions, + Button, + Typography, + Box, + Grid, + Card, + CardContent, + Chip, + Avatar, + Divider, + Alert, + CircularProgress +} from '@mui/material'; +import { + Close as CloseIcon, + Business as BusinessIcon, + Assessment as AssessmentIcon, + OpenInNew as OpenInNewIcon, + Link as LinkIcon +} from '@mui/icons-material'; +import { CompetitorAnalysisResponse } from '../../api/researchConfig'; + +interface OnboardingCompetitorModalProps { + open: boolean; + onClose: () => void; + data: CompetitorAnalysisResponse | null; + loading?: boolean; + error?: string | null; +} + +export const OnboardingCompetitorModal: React.FC = ({ + open, + onClose, + data, + loading = false, + error = null +}) => { + if (!data && !loading && !error) { + return null; + } + + const competitors = data?.competitors || []; + const socialMediaAccounts = data?.social_media_accounts || {}; + const researchSummary = data?.research_summary || {}; + + const avgScore = competitors.length > 0 + ? competitors.reduce((sum, c) => sum + (c.similarity_score || 0), 0) / competitors.length + : 0; + + return ( + + + + + + + Competitive Analysis from Onboarding + + + {loading ? 'Loading...' : `${competitors.length} competitors analyzed`} + + + + + + + + {loading && ( + + + + Loading competitor data... + + + )} + + {error && ( + + {error} + + )} + + {!loading && !error && data && ( + <> + {researchSummary.industry_insights && ( + } + sx={{ mb: 3, bgcolor: '#e0f2fe', borderLeft: '4px solid #0ea5e9' }} + > + + Market Insights + + + {researchSummary.industry_insights} + + + )} + + + + + + + Total Competitors + + + {competitors.length} + + + + + + + + + Avg Similarity + + + {Math.round(avgScore * 100)}% + + + + + + + + + Social Accounts Found + + + {Object.keys(socialMediaAccounts).length} + + + + + + + {Object.keys(socialMediaAccounts).length > 0 && ( + <> + + Social Media Accounts + + + {Object.entries(socialMediaAccounts).map(([platform, url]) => ( + } + label={`${platform}: ${url}`} + clickable + onClick={() => window.open(url, '_blank')} + sx={{ + bgcolor: '#f8fafc', + border: '1px solid #e2e8f0', + '&:hover': { + bgcolor: '#f1f5f9', + borderColor: '#cbd5e1' + } + }} + /> + ))} + + + + )} + + {competitors.length > 0 ? ( + <> + + Competitors ({competitors.length}) + + + + {competitors.map((competitor, index) => ( + + + + + + + + + + {competitor.name || competitor.domain || 'Unknown Competitor'} + + + {competitor.similarity_score !== undefined && ( + 0.7 + ? '#dcfce7' + : competitor.similarity_score > 0.5 + ? '#fef3c7' + : '#fee2e2', + color: competitor.similarity_score > 0.7 + ? '#166534' + : competitor.similarity_score > 0.5 + ? '#92400e' + : '#991b1b', + fontWeight: 600 + }} + /> + )} + {competitor.url && ( + + )} + + + + + {competitor.description && ( + + {competitor.description} + + )} + + {competitor.domain && ( + + {competitor.domain} + + )} + + + + ))} + + + ) : ( + !loading && ( + + + No competitor data available. Please complete onboarding step 3 to analyze competitors. + + + ) + )} + + )} + + + + + + + ); +}; diff --git a/frontend/src/components/Research/ResearchWizard.tsx b/frontend/src/components/Research/ResearchWizard.tsx index 74d9db06..d9f80747 100644 --- a/frontend/src/components/Research/ResearchWizard.tsx +++ b/frontend/src/components/Research/ResearchWizard.tsx @@ -1,4 +1,4 @@ -import React, { useEffect } from 'react'; +import React, { useEffect, useState } from 'react'; import { useResearchWizard } from './hooks/useResearchWizard'; import { useResearchExecution } from './hooks/useResearchExecution'; import { ResearchInput } from './steps/ResearchInput'; @@ -6,6 +6,9 @@ import { StepProgress } from './steps/StepProgress'; import { StepResults } from './steps/StepResults'; import { ResearchWizardProps } from './types/research.types'; import { addResearchHistory } from '../../utils/researchHistory'; +import { getResearchConfig, ProviderAvailability } from '../../api/researchConfig'; +import { ProviderChips } from './steps/components/ProviderChips'; +import { AdvancedChip } from './steps/components/AdvancedChip'; export const ResearchWizard: React.FC = ({ onComplete, @@ -24,6 +27,30 @@ export const ResearchWizard: React.FC = ({ initialConfig ); const execution = useResearchExecution(); + const [providerAvailability, setProviderAvailability] = useState(null); + const [advanced, setAdvanced] = useState(false); + + // Load provider availability on mount + useEffect(() => { + const loadProviderAvailability = async () => { + try { + const config = await getResearchConfig(); + setProviderAvailability(config?.provider_availability || null); + } catch (error) { + console.error('[ResearchWizard] Failed to load provider availability:', error); + // Set default availability on error + setProviderAvailability({ + google_available: true, + exa_available: false, + tavily_available: false, + gemini_key_status: 'missing', + exa_key_status: 'missing', + tavily_key_status: 'missing', + }); + } + }; + loadProviderAvailability(); + }, []); // Handle results from execution useEffect(() => { @@ -73,13 +100,13 @@ export const ResearchWizard: React.FC = ({ switch (wizard.state.currentStep) { case 1: - return ; + return ; case 2: return ; case 3: return ; default: - return ; + return ; } }; @@ -96,31 +123,124 @@ export const ResearchWizard: React.FC = ({ boxShadow: '0 4px 16px rgba(14, 165, 233, 0.1)', overflow: 'hidden', }}> - {/* Header */} + {/* Header with Compact Steps */}
-
-
-

+ {/* Title Section */} +
+
+

+ Research Wizard +

+ + {/* Provider Status Chips */} + + + {/* Advanced Chip */} + +
+ + {/* Compact Step Indicators */} +
- Research Wizard -

-

- Phase {wizard.state.currentStep} of {wizard.maxSteps} • AI-Powered Intelligence -

+ {[1, 2, 3].map((step, index) => { + const isActive = step === wizard.state.currentStep; + const isCompleted = step < wizard.state.currentStep; + const isClickable = step <= wizard.state.currentStep; + + return ( + + {index > 0 && ( +
+ )} +
{ + if (isClickable) { + wizard.updateState({ currentStep: step }); + } + }} + onMouseEnter={(e) => { + if (isClickable) { + e.currentTarget.style.transform = 'translateY(-1px)'; + } + }} + onMouseLeave={(e) => { + e.currentTarget.style.transform = 'translateY(0)'; + }} + > +
+ {isCompleted ? '✓' : step} +
+ + {step === 1 && 'Configure'} + {step === 2 && 'Execute'} + {step === 3 && 'Analyze'} + +
+ + ); + })} +
+ + {/* Cancel Button */} {onCancel && ( -
-
- {researchHistory.map((entry) => ( - - ))} -
+ gap: '5px', + transition: 'all 0.25s cubic-bezier(0.4, 0, 0.2, 1)', + boxShadow: '0 1px 2px rgba(14, 165, 233, 0.12)', + }} + onMouseEnter={(e) => { + e.currentTarget.style.background = 'linear-gradient(135deg, rgba(14, 165, 233, 0.15) 0%, rgba(59, 130, 246, 0.15) 100%)'; + e.currentTarget.style.borderColor = 'rgba(14, 165, 233, 0.35)'; + e.currentTarget.style.transform = 'translateY(-1px)'; + e.currentTarget.style.boxShadow = '0 2px 4px rgba(14, 165, 233, 0.18)'; + }} + onMouseLeave={(e) => { + e.currentTarget.style.background = 'linear-gradient(135deg, rgba(14, 165, 233, 0.1) 0%, rgba(59, 130, 246, 0.1) 100%)'; + e.currentTarget.style.borderColor = 'rgba(14, 165, 233, 0.25)'; + e.currentTarget.style.transform = 'translateY(0)'; + e.currentTarget.style.boxShadow = '0 1px 2px rgba(14, 165, 233, 0.12)'; + }} + title="Upload Document" + > + 📎 + Upload +
- )} - -
-