diff --git a/backend/api/blog_writer/router.py b/backend/api/blog_writer/router.py index 49c2ee51..6dd5fa7c 100644 --- a/backend/api/blog_writer/router.py +++ b/backend/api/blog_writer/router.py @@ -50,7 +50,9 @@ async def health() -> Dict[str, Any]: async def start_research(request: BlogResearchRequest) -> Dict[str, Any]: """Start a research operation and return a task ID for polling.""" try: - task_id = task_manager.start_research_task(request) + # TODO: Get user_id from authentication context + user_id = "anonymous" # This should come from auth middleware + task_id = await task_manager.start_research_task(request, user_id) return {"task_id": task_id, "status": "started"} except Exception as e: logger.error(f"Failed to start research: {e}") @@ -61,7 +63,7 @@ async def start_research(request: BlogResearchRequest) -> Dict[str, Any]: async def get_research_status(task_id: str) -> Dict[str, Any]: """Get the status of a research operation.""" try: - status = task_manager.get_task_status(task_id) + status = await task_manager.get_task_status(task_id) if status is None: raise HTTPException(status_code=404, detail="Task not found") diff --git a/backend/api/blog_writer/task_manager.py b/backend/api/blog_writer/task_manager.py index da4a8a2c..7906a688 100644 --- a/backend/api/blog_writer/task_manager.py +++ b/backend/api/blog_writer/task_manager.py @@ -3,6 +3,7 @@ Task Management System for Blog Writer API Handles background task execution, status tracking, and progress updates for research and outline generation operations. +Now uses database-backed persistence for reliability and recovery. """ import asyncio @@ -18,14 +19,22 @@ from models.blog_models import ( MediumBlogGenerateResult, ) from services.blog_writer.blog_service import BlogWriterService +from services.blog_writer.database_task_manager import DatabaseTaskManager class TaskManager: """Manages background tasks for research and outline generation.""" - def __init__(self): - self.task_storage: Dict[str, Dict[str, Any]] = {} - self.service = BlogWriterService() + def __init__(self, db_connection=None): + # Fallback to in-memory storage if no database connection + if db_connection: + self.db_manager = DatabaseTaskManager(db_connection) + self.use_database = True + else: + self.task_storage: Dict[str, Dict[str, Any]] = {} + self.service = BlogWriterService() + self.use_database = False + logger.warning("No database connection provided, using in-memory task storage") def cleanup_old_tasks(self): """Remove tasks older than 1 hour to prevent memory leaks.""" @@ -54,54 +63,61 @@ class TaskManager: return task_id - def get_task_status(self, task_id: str) -> Dict[str, Any]: + async def get_task_status(self, task_id: str) -> Dict[str, Any]: """Get the status of a task.""" - self.cleanup_old_tasks() - - if task_id not in self.task_storage: - return None - - task = self.task_storage[task_id] - response = { - "task_id": task_id, - "status": task["status"], - "created_at": task["created_at"].isoformat(), - "progress_messages": task.get("progress_messages", []) - } - - if task["status"] == "completed": - response["result"] = task["result"] - elif task["status"] == "failed": - response["error"] = task["error"] - - return response - - async def update_progress(self, task_id: str, message: str): - """Update progress message for a task.""" - if task_id in self.task_storage: - if "progress_messages" not in self.task_storage[task_id]: - self.task_storage[task_id]["progress_messages"] = [] + if self.use_database: + return await self.db_manager.get_task_status(task_id) + else: + self.cleanup_old_tasks() - progress_entry = { - "timestamp": datetime.now().isoformat(), - "message": message + if task_id not in self.task_storage: + return None + + task = self.task_storage[task_id] + response = { + "task_id": task_id, + "status": task["status"], + "created_at": task["created_at"].isoformat(), + "progress_messages": task.get("progress_messages", []) } - self.task_storage[task_id]["progress_messages"].append(progress_entry) - # Keep only last 10 progress messages to prevent memory bloat - if len(self.task_storage[task_id]["progress_messages"]) > 10: - self.task_storage[task_id]["progress_messages"] = self.task_storage[task_id]["progress_messages"][-10:] + if task["status"] == "completed": + response["result"] = task["result"] + elif task["status"] == "failed": + response["error"] = task["error"] - logger.info(f"Progress update for task {task_id}: {message}") + return response - def start_research_task(self, request: BlogResearchRequest) -> str: + async def update_progress(self, task_id: str, message: str, percentage: float = None): + """Update progress message for a task.""" + if self.use_database: + await self.db_manager.update_progress(task_id, message, percentage) + else: + if task_id in self.task_storage: + if "progress_messages" not in self.task_storage[task_id]: + self.task_storage[task_id]["progress_messages"] = [] + + progress_entry = { + "timestamp": datetime.now().isoformat(), + "message": message + } + self.task_storage[task_id]["progress_messages"].append(progress_entry) + + # Keep only last 10 progress messages to prevent memory bloat + if len(self.task_storage[task_id]["progress_messages"]) > 10: + self.task_storage[task_id]["progress_messages"] = self.task_storage[task_id]["progress_messages"][-10:] + + logger.info(f"Progress update for task {task_id}: {message}") + + async def start_research_task(self, request: BlogResearchRequest, user_id: str = "anonymous") -> str: """Start a research operation and return a task ID.""" - task_id = self.create_task("research") - - # Start the research operation in the background - asyncio.create_task(self._run_research_task(task_id, request)) - - return task_id + if self.use_database: + return await self.db_manager.start_research_task(request, user_id) + else: + task_id = self.create_task("research") + # Start the research operation in the background + asyncio.create_task(self._run_research_task(task_id, request)) + return task_id def start_outline_task(self, request: BlogOutlineRequest) -> str: """Start an outline generation operation and return a task ID.""" diff --git a/backend/api/content_planning/api/routes/monitoring.py b/backend/api/content_planning/api/routes/monitoring.py index 724e7ade..19cb0b7f 100644 --- a/backend/api/content_planning/api/routes/monitoring.py +++ b/backend/api/content_planning/api/routes/monitoring.py @@ -7,7 +7,7 @@ from fastapi import APIRouter, HTTPException from typing import Dict, Any from loguru import logger -from middleware.monitoring_middleware import get_monitoring_stats, get_lightweight_stats +from services.subscription import get_monitoring_stats, get_lightweight_stats from services.comprehensive_user_data_cache_service import ComprehensiveUserDataCacheService from services.database import get_db diff --git a/backend/api/content_planning/docs/ENHANCED_STRATEGY_SERVICE.py b/backend/api/content_planning/docs/ENHANCED_STRATEGY_SERVICE.py index ed0c3b54..ea1a493f 100644 --- a/backend/api/content_planning/docs/ENHANCED_STRATEGY_SERVICE.py +++ b/backend/api/content_planning/docs/ENHANCED_STRATEGY_SERVICE.py @@ -13,7 +13,7 @@ from sqlalchemy.orm import Session from services.content_planning_db import ContentPlanningDBService from services.ai_analysis_db_service import AIAnalysisDBService from services.ai_analytics_service import AIAnalyticsService -from services.onboarding_data_service import OnboardingDataService +from services.onboarding.data_service import OnboardingDataService # Import utilities from ..utils.error_handlers import ContentPlanningErrorHandler diff --git a/backend/api/content_planning/services/ai_analytics_service.py b/backend/api/content_planning/services/ai_analytics_service.py index b0dc476d..0fb88952 100644 --- a/backend/api/content_planning/services/ai_analytics_service.py +++ b/backend/api/content_planning/services/ai_analytics_service.py @@ -13,7 +13,7 @@ import time from services.content_planning_db import ContentPlanningDBService from services.ai_analysis_db_service import AIAnalysisDBService from services.ai_analytics_service import AIAnalyticsService -from services.onboarding_data_service import OnboardingDataService +from services.onboarding.data_service import OnboardingDataService # Import utilities from ..utils.error_handlers import ContentPlanningErrorHandler diff --git a/backend/api/content_planning/services/calendar_generation_service.py b/backend/api/content_planning/services/calendar_generation_service.py index 7bde576b..173dd456 100644 --- a/backend/api/content_planning/services/calendar_generation_service.py +++ b/backend/api/content_planning/services/calendar_generation_service.py @@ -307,7 +307,7 @@ class CalendarGenerationService: logger.info("πŸ₯ Performing calendar generation health check") # Check AI services - from services.api_key_manager import APIKeyManager + from services.onboarding.api_key_manager import APIKeyManager api_manager = APIKeyManager() api_key_status = check_all_api_keys(api_manager) diff --git a/backend/api/content_planning/services/gap_analysis_service.py b/backend/api/content_planning/services/gap_analysis_service.py index cd583a5c..5e83f617 100644 --- a/backend/api/content_planning/services/gap_analysis_service.py +++ b/backend/api/content_planning/services/gap_analysis_service.py @@ -11,7 +11,7 @@ from sqlalchemy.orm import Session # Import database services from services.content_planning_db import ContentPlanningDBService from services.ai_analysis_db_service import AIAnalysisDBService -from services.onboarding_data_service import OnboardingDataService +from services.onboarding.data_service import OnboardingDataService # Import migrated content gap analysis services from services.content_gap_analyzer.content_gap_analyzer import ContentGapAnalyzer diff --git a/backend/api/linkedin_image_generation.py b/backend/api/linkedin_image_generation.py index 5c18af38..3e66f7c7 100644 --- a/backend/api/linkedin_image_generation.py +++ b/backend/api/linkedin_image_generation.py @@ -7,7 +7,7 @@ import logging # Import our LinkedIn image generation services from services.linkedin.image_generation import LinkedInImageGenerator, LinkedInImageStorage from services.linkedin.image_prompts import LinkedInPromptGenerator -from services.api_key_manager import APIKeyManager +from services.onboarding.api_key_manager import APIKeyManager # Set up logging logging.basicConfig(level=logging.INFO) diff --git a/backend/api/onboarding_utils/api_key_management_service.py b/backend/api/onboarding_utils/api_key_management_service.py index c543e92e..42906323 100644 --- a/backend/api/onboarding_utils/api_key_management_service.py +++ b/backend/api/onboarding_utils/api_key_management_service.py @@ -8,7 +8,7 @@ from typing import Dict, Any from fastapi import HTTPException from loguru import logger -from services.api_key_manager import APIKeyManager +from services.onboarding.api_key_manager import APIKeyManager from services.validation import check_all_api_keys class APIKeyManagementService: @@ -21,7 +21,7 @@ class APIKeyManagementService: if not hasattr(self.api_key_manager, 'use_database'): self.api_key_manager.use_database = True try: - from services.onboarding_database_service import OnboardingDatabaseService + from services.onboarding.database_service import OnboardingDatabaseService self.api_key_manager.db_service = OnboardingDatabaseService() logger.info("Database service initialized for APIKeyManager") except Exception as e: diff --git a/backend/api/onboarding_utils/endpoint_models.py b/backend/api/onboarding_utils/endpoint_models.py index 4734bb15..bcc66abf 100644 --- a/backend/api/onboarding_utils/endpoint_models.py +++ b/backend/api/onboarding_utils/endpoint_models.py @@ -1,6 +1,6 @@ from typing import Dict, Any, List, Optional from pydantic import BaseModel, Field -from services.api_key_manager import ( +from services.onboarding.api_key_manager import ( OnboardingProgress, get_onboarding_progress, get_onboarding_progress_for_user, diff --git a/backend/api/onboarding_utils/endpoints_core.py b/backend/api/onboarding_utils/endpoints_core.py index e640eb7f..1ae5535d 100644 --- a/backend/api/onboarding_utils/endpoints_core.py +++ b/backend/api/onboarding_utils/endpoints_core.py @@ -5,7 +5,7 @@ from fastapi import HTTPException, Depends from middleware.auth_middleware import get_current_user -from services.onboarding_progress_service import get_onboarding_progress_service +from services.onboarding.progress_service import get_onboarding_progress_service def health_check(): diff --git a/backend/api/onboarding_utils/onboarding_completion_service.py b/backend/api/onboarding_utils/onboarding_completion_service.py index 19ac807f..33e4380a 100644 --- a/backend/api/onboarding_utils/onboarding_completion_service.py +++ b/backend/api/onboarding_utils/onboarding_completion_service.py @@ -8,8 +8,8 @@ from datetime import datetime from fastapi import HTTPException from loguru import logger -from services.onboarding_progress_service import get_onboarding_progress_service -from services.onboarding_database_service import OnboardingDatabaseService +from services.onboarding.progress_service import get_onboarding_progress_service +from services.onboarding.database_service import OnboardingDatabaseService from services.database import get_db from services.persona_analysis_service import PersonaAnalysisService diff --git a/backend/api/onboarding_utils/onboarding_config_service.py b/backend/api/onboarding_utils/onboarding_config_service.py index 35d4c5e8..c4224a05 100644 --- a/backend/api/onboarding_utils/onboarding_config_service.py +++ b/backend/api/onboarding_utils/onboarding_config_service.py @@ -7,7 +7,7 @@ from typing import Dict, Any from fastapi import HTTPException from loguru import logger -from services.api_key_manager import get_api_key_manager +from services.onboarding.api_key_manager import get_api_key_manager from services.validation import check_all_api_keys class OnboardingConfigService: diff --git a/backend/api/onboarding_utils/onboarding_control_service.py b/backend/api/onboarding_utils/onboarding_control_service.py index 1b89fdeb..0c8cbcf9 100644 --- a/backend/api/onboarding_utils/onboarding_control_service.py +++ b/backend/api/onboarding_utils/onboarding_control_service.py @@ -7,7 +7,7 @@ from typing import Dict, Any from fastapi import HTTPException from loguru import logger -from services.api_key_manager import get_onboarding_progress, get_onboarding_progress_for_user +from services.onboarding.api_key_manager import get_onboarding_progress, get_onboarding_progress_for_user class OnboardingControlService: """Service for handling onboarding control operations.""" diff --git a/backend/api/onboarding_utils/onboarding_summary_service.py b/backend/api/onboarding_utils/onboarding_summary_service.py index be9b496d..aaa38f36 100644 --- a/backend/api/onboarding_utils/onboarding_summary_service.py +++ b/backend/api/onboarding_utils/onboarding_summary_service.py @@ -7,9 +7,9 @@ from typing import Dict, Any, Optional from fastapi import HTTPException from loguru import logger -from services.api_key_manager import get_api_key_manager +from services.onboarding.api_key_manager import get_api_key_manager from services.database import get_db -from services.onboarding_database_service import OnboardingDatabaseService +from services.onboarding.database_service import OnboardingDatabaseService from services.website_analysis_service import WebsiteAnalysisService from services.research_preferences_service import ResearchPreferencesService from services.persona_analysis_service import PersonaAnalysisService diff --git a/backend/api/onboarding_utils/step_management_service.py b/backend/api/onboarding_utils/step_management_service.py index 485fbff1..4b6f30b2 100644 --- a/backend/api/onboarding_utils/step_management_service.py +++ b/backend/api/onboarding_utils/step_management_service.py @@ -7,8 +7,8 @@ from typing import Dict, Any, List, Optional from fastapi import HTTPException from loguru import logger -from services.onboarding_progress_service import get_onboarding_progress_service -from services.onboarding_database_service import OnboardingDatabaseService +from services.onboarding.progress_service import get_onboarding_progress_service +from services.onboarding.database_service import OnboardingDatabaseService from services.database import get_db class StepManagementService: diff --git a/backend/api/persona.py b/backend/api/persona.py index 867d8096..787ef47c 100644 --- a/backend/api/persona.py +++ b/backend/api/persona.py @@ -302,7 +302,7 @@ async def generate_platform_persona(user_id: str, platform: str, db_session): # Import services from services.persona_data_service import PersonaDataService - from services.onboarding_database_service import OnboardingDatabaseService + from services.onboarding.database_service import OnboardingDatabaseService persona_data_service = PersonaDataService(db_session=db_session) onboarding_service = OnboardingDatabaseService(db=db_session) diff --git a/backend/api/seo_dashboard.py b/backend/api/seo_dashboard.py index 428f0fd1..881da4a7 100644 --- a/backend/api/seo_dashboard.py +++ b/backend/api/seo_dashboard.py @@ -10,11 +10,13 @@ from loguru import logger import time # Import existing services -from services.api_key_manager import APIKeyManager +from services.onboarding.api_key_manager import APIKeyManager from services.validation import check_all_api_keys from services.seo_analyzer import ComprehensiveSEOAnalyzer, SEOAnalysisResult, SEOAnalysisService from services.user_data_service import UserDataService from services.database import get_db_session +from services.seo import SEODashboardService +from middleware.auth_middleware import get_current_user # Initialize the SEO analyzer seo_analyzer = ComprehensiveSEOAnalyzer() @@ -238,48 +240,126 @@ def generate_ai_insights(metrics: Dict[str, Any], platforms: Dict[str, Any]) -> return insights # API Endpoints -async def get_seo_dashboard_data() -> SEODashboardData: +async def get_seo_dashboard_data(current_user: dict = Depends(get_current_user)) -> SEODashboardData: """Get comprehensive SEO dashboard data.""" try: - # For now, return mock data - # In production, this would fetch real data from database - return get_mock_seo_data() + user_id = str(current_user.get('id')) + db_session = get_db_session() + + if not db_session: + logger.error("No database session available") + return get_mock_seo_data() + + try: + # Use new SEO dashboard service + dashboard_service = SEODashboardService(db_session) + overview_data = await dashboard_service.get_dashboard_overview(user_id) + + # Convert to SEODashboardData format + return SEODashboardData( + health_score=SEOHealthScore(**overview_data.get("health_score", {})), + key_insight=overview_data.get("key_insight", "Connect your analytics accounts for personalized insights"), + priority_alert=overview_data.get("priority_alert", "No alerts at this time"), + metrics=_convert_metrics(overview_data.get("summary", {})), + platforms=_convert_platforms(overview_data.get("platforms", {})), + ai_insights=[AIInsight(**insight) for insight in overview_data.get("ai_insights", [])], + last_updated=overview_data.get("last_updated", datetime.now().isoformat()), + website_url=overview_data.get("website_url") + ) + finally: + db_session.close() + except Exception as e: logger.error(f"Error getting SEO dashboard data: {e}") - raise HTTPException(status_code=500, detail="Failed to get SEO dashboard data") + # Fallback to mock data + return get_mock_seo_data() -async def get_seo_health_score() -> SEOHealthScore: +async def get_seo_health_score(current_user: dict = Depends(get_current_user)) -> SEOHealthScore: """Get current SEO health score.""" try: - mock_data = get_mock_seo_data() - return mock_data.health_score + user_id = str(current_user.get('id')) + db_session = get_db_session() + + if not db_session: + raise HTTPException(status_code=500, detail="Database connection unavailable") + + try: + dashboard_service = SEODashboardService(db_session) + overview_data = await dashboard_service.get_dashboard_overview(user_id) + health_score_data = overview_data.get("health_score", {}) + return SEOHealthScore(**health_score_data) + finally: + db_session.close() + except Exception as e: logger.error(f"Error getting SEO health score: {e}") raise HTTPException(status_code=500, detail="Failed to get SEO health score") -async def get_seo_metrics() -> Dict[str, SEOMetric]: +async def get_seo_metrics(current_user: dict = Depends(get_current_user)) -> Dict[str, SEOMetric]: """Get SEO metrics.""" try: - mock_data = get_mock_seo_data() - return mock_data.metrics + user_id = str(current_user.get('id')) + db_session = get_db_session() + + if not db_session: + raise HTTPException(status_code=500, detail="Database connection unavailable") + + try: + dashboard_service = SEODashboardService(db_session) + overview_data = await dashboard_service.get_dashboard_overview(user_id) + summary_data = overview_data.get("summary", {}) + return _convert_metrics(summary_data) + finally: + db_session.close() + except Exception as e: logger.error(f"Error getting SEO metrics: {e}") raise HTTPException(status_code=500, detail="Failed to get SEO metrics") -async def get_platform_status() -> Dict[str, PlatformStatus]: +async def get_platform_status( + current_user: dict = Depends(get_current_user) +) -> Dict[str, Any]: """Get platform connection status.""" try: - mock_data = get_mock_seo_data() - return mock_data.platforms + user_id = str(current_user.get('id')) + db_session = get_db_session() + + if not db_session: + logger.error("No database session available") + raise HTTPException(status_code=500, detail="Database connection failed") + + try: + # Use SEO dashboard service to get platform status + dashboard_service = SEODashboardService(db_session) + platform_status = await dashboard_service.get_platform_status(user_id) + + logger.info(f"Retrieved platform status for user {user_id}") + return platform_status + + finally: + db_session.close() + except Exception as e: logger.error(f"Error getting platform status: {e}") raise HTTPException(status_code=500, detail="Failed to get platform status") -async def get_ai_insights() -> List[AIInsight]: +async def get_ai_insights(current_user: dict = Depends(get_current_user)) -> List[AIInsight]: """Get AI-generated insights.""" try: - mock_data = get_mock_seo_data() - return mock_data.ai_insights + user_id = str(current_user.get('id')) + db_session = get_db_session() + + if not db_session: + raise HTTPException(status_code=500, detail="Database connection unavailable") + + try: + dashboard_service = SEODashboardService(db_session) + overview_data = await dashboard_service.get_dashboard_overview(user_id) + ai_insights_data = overview_data.get("ai_insights", []) + return [AIInsight(**insight) for insight in ai_insights_data] + finally: + db_session.close() + except Exception as e: logger.error(f"Error getting AI insights: {e}") raise HTTPException(status_code=500, detail="Failed to get AI insights") @@ -568,4 +648,205 @@ async def batch_analyze_urls(urls: List[str]) -> Dict[str, Any]: raise HTTPException( status_code=500, detail=f"Error in batch analysis: {str(e)}" - ) \ No newline at end of file + ) + +# New SEO Dashboard Endpoints with Real Data + +async def get_seo_dashboard_overview( + current_user: dict = Depends(get_current_user), + site_url: Optional[str] = None +) -> Dict[str, Any]: + """Get comprehensive SEO dashboard overview with real GSC/Bing data.""" + try: + user_id = str(current_user.get('id')) + db_session = get_db_session() + + if not db_session: + logger.error("No database session available") + raise HTTPException(status_code=500, detail="Database connection failed") + + try: + # Use SEO dashboard service to get real data + dashboard_service = SEODashboardService(db_session) + overview_data = await dashboard_service.get_dashboard_overview(user_id, site_url) + + logger.info(f"Retrieved SEO dashboard overview for user {user_id}") + return overview_data + + finally: + db_session.close() + + except Exception as e: + logger.error(f"Error getting SEO dashboard overview: {e}") + raise HTTPException(status_code=500, detail="Failed to get dashboard overview") + +async def get_gsc_raw_data( + current_user: dict = Depends(get_current_user), + site_url: Optional[str] = None +) -> Dict[str, Any]: + """Get raw GSC data for the specified site.""" + try: + user_id = str(current_user.get('id')) + db_session = get_db_session() + + if not db_session: + logger.error("No database session available") + raise HTTPException(status_code=500, detail="Database connection failed") + + try: + # Use SEO dashboard service to get GSC data + dashboard_service = SEODashboardService(db_session) + gsc_data = await dashboard_service.get_gsc_data(user_id, site_url) + + logger.info(f"Retrieved GSC raw data for user {user_id}") + return gsc_data + + finally: + db_session.close() + + except Exception as e: + logger.error(f"Error getting GSC raw data: {e}") + raise HTTPException(status_code=500, detail="Failed to get GSC data") + +async def get_bing_raw_data( + current_user: dict = Depends(get_current_user), + site_url: Optional[str] = None +) -> Dict[str, Any]: + """Get raw Bing data for the specified site.""" + try: + user_id = str(current_user.get('id')) + db_session = get_db_session() + + if not db_session: + logger.error("No database session available") + raise HTTPException(status_code=500, detail="Database connection failed") + + try: + # Use SEO dashboard service to get Bing data + dashboard_service = SEODashboardService(db_session) + bing_data = await dashboard_service.get_bing_data(user_id, site_url) + + logger.info(f"Retrieved Bing raw data for user {user_id}") + return bing_data + + finally: + db_session.close() + + except Exception as e: + logger.error(f"Error getting Bing raw data: {e}") + raise HTTPException(status_code=500, detail="Failed to get Bing data") + +async def get_competitive_insights( + current_user: dict = Depends(get_current_user), + site_url: Optional[str] = None +) -> Dict[str, Any]: + """Get competitive insights from onboarding step 3 data.""" + try: + user_id = str(current_user.get('id')) + db_session = get_db_session() + + if not db_session: + logger.error("No database session available") + raise HTTPException(status_code=500, detail="Database connection failed") + + try: + # Use SEO dashboard service to get competitive insights + dashboard_service = SEODashboardService(db_session) + insights_data = await dashboard_service.get_competitive_insights(user_id) + + logger.info(f"Retrieved competitive insights for user {user_id}") + return insights_data + + finally: + db_session.close() + + except Exception as e: + logger.error(f"Error getting competitive insights: {e}") + raise HTTPException(status_code=500, detail="Failed to get competitive insights") + +async def refresh_analytics_data( + current_user: dict = Depends(get_current_user), + site_url: Optional[str] = None +) -> Dict[str, Any]: + """Refresh analytics data by invalidating cache and fetching fresh data.""" + try: + user_id = str(current_user.get('id')) + db_session = get_db_session() + + if not db_session: + logger.error("No database session available") + raise HTTPException(status_code=500, detail="Database connection failed") + + try: + # Use SEO dashboard service to refresh data + dashboard_service = SEODashboardService(db_session) + refresh_result = await dashboard_service.refresh_analytics_data(user_id, site_url) + + logger.info(f"Refreshed analytics data for user {user_id}") + return refresh_result + + finally: + db_session.close() + + except Exception as e: + logger.error(f"Error refreshing analytics data: {e}") + raise HTTPException(status_code=500, detail="Failed to refresh analytics data") + +# Helper methods for data conversion +def _convert_metrics(summary_data: Dict[str, Any]) -> Dict[str, SEOMetric]: + """Convert summary data to SEOMetric format.""" + try: + return { + "traffic": SEOMetric( + value=summary_data.get("clicks", 0), + change=0, # Would calculate from historical data + trend="up", + description="Organic traffic", + color="#4CAF50" + ), + "rankings": SEOMetric( + value=summary_data.get("position", 0), + change=0, # Would calculate from historical data + trend="up", + description="Average ranking", + color="#2196F3" + ), + "mobile": SEOMetric( + value=0, # Would get from performance data + change=0, + trend="stable", + description="Mobile speed", + color="#FF9800" + ), + "keywords": SEOMetric( + value=0, # Would count from query data + change=0, + trend="up", + description="Keywords tracked", + color="#9C27B0" + ) + } + except Exception as e: + logger.error(f"Error converting metrics: {e}") + return {} + +def _convert_platforms(platform_data: Dict[str, Any]) -> Dict[str, PlatformStatus]: + """Convert platform data to PlatformStatus format.""" + try: + return { + "google_search_console": PlatformStatus( + status="connected" if platform_data.get("gsc", {}).get("connected", False) else "disconnected", + connected=platform_data.get("gsc", {}).get("connected", False), + last_sync=platform_data.get("gsc", {}).get("last_sync"), + data_points=len(platform_data.get("gsc", {}).get("sites", [])) + ), + "bing_webmaster": PlatformStatus( + status="connected" if platform_data.get("bing", {}).get("connected", False) else "disconnected", + connected=platform_data.get("bing", {}).get("connected", False), + last_sync=platform_data.get("bing", {}).get("last_sync"), + data_points=len(platform_data.get("bing", {}).get("sites", [])) + ) + } + except Exception as e: + logger.error(f"Error converting platforms: {e}") + return {} \ No newline at end of file diff --git a/backend/api/subscription_api.py b/backend/api/subscription_api.py index b296d5d9..2cd2b704 100644 --- a/backend/api/subscription_api.py +++ b/backend/api/subscription_api.py @@ -11,8 +11,7 @@ from loguru import logger from functools import lru_cache from services.database import get_db -from services.usage_tracking_service import UsageTrackingService -from services.pricing_service import PricingService +from services.subscription import UsageTrackingService, PricingService from middleware.auth_middleware import get_current_user from models.subscription_models import ( APIProvider, SubscriptionPlan, UserSubscription, UsageSummary, @@ -25,7 +24,7 @@ router = APIRouter(prefix="/api/subscription", tags=["subscription"]) # Cache key: (user_id). TTL-like behavior implemented via timestamp check _dashboard_cache: Dict[str, Dict[str, Any]] = {} _dashboard_cache_ts: Dict[str, float] = {} -_DASHBOARD_CACHE_TTL_SEC = 2.0 +_DASHBOARD_CACHE_TTL_SEC = 600.0 @router.get("/usage/{user_id}") async def get_user_usage( @@ -48,10 +47,9 @@ async def get_user_usage( "success": True, "data": stats } - except Exception as e: logger.error(f"Error getting user usage: {e}") - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=500, detail="Failed to get user usage") @router.get("/usage/{user_id}/trends") async def get_usage_trends( @@ -279,19 +277,29 @@ async def get_subscription_status( } } - # Check if subscription is within valid period + # Check if subscription is within valid period; auto-advance if expired and auto_renew now = datetime.utcnow() if subscription.current_period_end < now: - return { - "success": True, - "data": { - "active": False, - "plan": subscription.plan.tier.value, - "tier": subscription.plan.tier.value, - "can_use_api": False, - "reason": "Subscription expired" + if getattr(subscription, 'auto_renew', False): + # advance period + try: + from services.pricing_service import PricingService + pricing = PricingService(db) + # reuse helper to ensure current + pricing._ensure_subscription_current(subscription) + except Exception as e: + logger.error(f"Failed to auto-advance subscription: {e}") + else: + return { + "success": True, + "data": { + "active": False, + "plan": subscription.plan.tier.value, + "tier": subscription.plan.tier.value, + "can_use_api": False, + "reason": "Subscription expired" + } } - } return { "success": True, @@ -544,7 +552,14 @@ async def get_dashboard_data( # Serve from short TTL cache to avoid hammering DB on bursts import time now = time.time() - if user_id in _dashboard_cache and (now - _dashboard_cache_ts.get(user_id, 0)) < _DASHBOARD_CACHE_TTL_SEC: + import os + nocache = False + try: + # Not having direct access to request here; provide env flag override as simple control + nocache = os.getenv('SUBSCRIPTION_DASHBOARD_NOCACHE', 'false').lower() in {'1','true','yes','on'} + except Exception: + nocache = False + if not nocache and user_id in _dashboard_cache and (now - _dashboard_cache_ts.get(user_id, 0)) < _DASHBOARD_CACHE_TTL_SEC: return _dashboard_cache[user_id] usage_service = UsageTrackingService(db) diff --git a/backend/app.py b/backend/app.py index 2641c4d0..902970d3 100644 --- a/backend/app.py +++ b/backend/app.py @@ -9,7 +9,7 @@ from loguru import logger from dotenv import load_dotenv import asyncio from datetime import datetime -from middleware.monitoring_middleware import monitoring_middleware +from services.subscription import monitoring_middleware # Import modular utilities from alwrity_utils import HealthChecker, RateLimiter, FrontendServing, RouterManager, OnboardingManager @@ -74,7 +74,12 @@ from api.seo_dashboard import ( get_seo_metrics_detailed, get_analysis_summary, batch_analyze_urls, - SEOAnalysisRequest + SEOAnalysisRequest, + get_seo_dashboard_overview, + get_gsc_raw_data, + get_bing_raw_data, + get_competitive_insights, + refresh_analytics_data ) # Initialize FastAPI app @@ -85,15 +90,28 @@ app = FastAPI( ) # Add CORS middleware +# Build allowed origins list with env overrides to support dynamic tunnels (e.g., ngrok) +default_allowed_origins = [ + "http://localhost:3000", # React dev server + "http://localhost:8000", # Backend dev server + "http://localhost:3001", # Alternative React port + "https://alwrity-ai.vercel.app", # Vercel frontend +] + +# Optional dynamic origins from environment (comma-separated) +env_origins = os.getenv("ALWRITY_ALLOWED_ORIGINS", "").split(",") if os.getenv("ALWRITY_ALLOWED_ORIGINS") else [] +env_origins = [o.strip() for o in env_origins if o.strip()] + +# Convenience: NGROK_URL env var (single origin) +ngrok_origin = os.getenv("NGROK_URL") +if ngrok_origin: + env_origins.append(ngrok_origin.strip()) + +allowed_origins = list(dict.fromkeys(default_allowed_origins + env_origins)) # de-duplicate, keep order + app.add_middleware( CORSMiddleware, - allow_origins=[ - "http://localhost:3000", # React dev server - "http://localhost:8000", # Backend dev server - "http://localhost:3001", # Alternative React port - "https://alwrity-ai.vercel.app", - "https://alwrity-ai.vercel.app", # Vercel frontend - ], + allow_origins=allowed_origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], @@ -192,15 +210,41 @@ async def seo_metrics(): return await get_seo_metrics() @app.get("/api/seo-dashboard/platforms") -async def seo_platforms(): +async def seo_platforms(current_user: dict = Depends(get_current_user)): """Get platform status.""" - return await get_platform_status() + return await get_platform_status(current_user) @app.get("/api/seo-dashboard/insights") async def seo_insights(): """Get AI insights.""" return await get_ai_insights() +# New SEO Dashboard endpoints with real data +@app.get("/api/seo-dashboard/overview") +async def seo_dashboard_overview_endpoint(current_user: dict = Depends(get_current_user), site_url: str = None): + """Get comprehensive SEO dashboard overview with real GSC/Bing data.""" + return await get_seo_dashboard_overview(current_user, site_url) + +@app.get("/api/seo-dashboard/gsc/raw") +async def gsc_raw_data_endpoint(current_user: dict = Depends(get_current_user), site_url: str = None): + """Get raw GSC data for the specified site.""" + return await get_gsc_raw_data(current_user, site_url) + +@app.get("/api/seo-dashboard/bing/raw") +async def bing_raw_data_endpoint(current_user: dict = Depends(get_current_user), site_url: str = None): + """Get raw Bing data for the specified site.""" + return await get_bing_raw_data(current_user, site_url) + +@app.get("/api/seo-dashboard/competitive-insights") +async def competitive_insights_endpoint(current_user: dict = Depends(get_current_user), site_url: str = None): + """Get competitive insights from onboarding step 3 data.""" + return await get_competitive_insights(current_user, site_url) + +@app.post("/api/seo-dashboard/refresh") +async def refresh_analytics_data_endpoint(current_user: dict = Depends(get_current_user), site_url: str = None): + """Refresh analytics data by invalidating cache and fetching fresh data.""" + return await refresh_analytics_data(current_user, site_url) + @app.get("/api/seo-dashboard/health") async def seo_dashboard_health(): """Health check for SEO dashboard.""" @@ -232,6 +276,10 @@ async def batch_analyze_urls_endpoint(urls: list[str]): """Analyze multiple URLs in batch.""" return await batch_analyze_urls(urls) +# Include platform analytics router +from routers.platform_analytics import router as platform_analytics_router +app.include_router(platform_analytics_router) + # Setup frontend serving using modular utilities frontend_serving.setup_frontend_serving() diff --git a/backend/database/migrations/create_blog_writer_tasks.sql b/backend/database/migrations/create_blog_writer_tasks.sql new file mode 100644 index 00000000..0c32084f --- /dev/null +++ b/backend/database/migrations/create_blog_writer_tasks.sql @@ -0,0 +1,149 @@ +-- Blog Writer Task Persistence Tables +-- Creates tables for storing task state, progress, and metrics + +-- Tasks table - stores main task information +CREATE TABLE IF NOT EXISTS blog_writer_tasks ( + id VARCHAR(36) PRIMARY KEY, + user_id VARCHAR(36) NOT NULL, + task_type VARCHAR(50) NOT NULL, -- 'research', 'outline', 'content', 'seo', 'medium_generation' + status VARCHAR(20) NOT NULL DEFAULT 'pending', -- 'pending', 'running', 'completed', 'failed', 'cancelled' + request_data JSONB, -- Original request parameters + result_data JSONB, -- Final result data + error_data JSONB, -- Error information if failed + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + completed_at TIMESTAMP WITH TIME ZONE, + correlation_id VARCHAR(36), -- For request tracing + operation VARCHAR(100), -- Specific operation being performed + retry_count INTEGER DEFAULT 0, -- Number of retry attempts + max_retries INTEGER DEFAULT 3, -- Maximum retry attempts allowed + priority INTEGER DEFAULT 0, -- Task priority (higher = more important) + metadata JSONB -- Additional metadata +); + +-- Task progress table - stores progress updates +CREATE TABLE IF NOT EXISTS blog_writer_task_progress ( + id SERIAL PRIMARY KEY, + task_id VARCHAR(36) NOT NULL REFERENCES blog_writer_tasks(id) ON DELETE CASCADE, + timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + message TEXT NOT NULL, + percentage DECIMAL(5,2) DEFAULT 0.00, -- 0.00 to 100.00 + progress_type VARCHAR(50) DEFAULT 'info', -- 'info', 'warning', 'error', 'success' + metadata JSONB -- Additional progress metadata +); + +-- Task metrics table - stores performance metrics +CREATE TABLE IF NOT EXISTS blog_writer_task_metrics ( + id SERIAL PRIMARY KEY, + task_id VARCHAR(36) NOT NULL REFERENCES blog_writer_tasks(id) ON DELETE CASCADE, + operation VARCHAR(100) NOT NULL, + duration_ms INTEGER NOT NULL, + token_usage JSONB, -- Token usage statistics + api_calls INTEGER DEFAULT 0, + cache_hits INTEGER DEFAULT 0, + cache_misses INTEGER DEFAULT 0, + error_count INTEGER DEFAULT 0, + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + metadata JSONB -- Additional metrics +); + +-- Task recovery table - stores recovery information +CREATE TABLE IF NOT EXISTS blog_writer_task_recovery ( + id SERIAL PRIMARY KEY, + task_id VARCHAR(36) NOT NULL REFERENCES blog_writer_tasks(id) ON DELETE CASCADE, + recovery_reason VARCHAR(100) NOT NULL, -- 'server_restart', 'timeout', 'error' + recovery_action VARCHAR(100) NOT NULL, -- 'resume', 'retry', 'fail' + checkpoint_data JSONB, -- State at recovery point + recovered_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + recovery_successful BOOLEAN DEFAULT FALSE, + metadata JSONB +); + +-- Indexes for performance +CREATE INDEX IF NOT EXISTS idx_blog_writer_tasks_user_id ON blog_writer_tasks(user_id); +CREATE INDEX IF NOT EXISTS idx_blog_writer_tasks_status ON blog_writer_tasks(status); +CREATE INDEX IF NOT EXISTS idx_blog_writer_tasks_created_at ON blog_writer_tasks(created_at); +CREATE INDEX IF NOT EXISTS idx_blog_writer_tasks_task_type ON blog_writer_tasks(task_type); +CREATE INDEX IF NOT EXISTS idx_blog_writer_tasks_correlation_id ON blog_writer_tasks(correlation_id); + +CREATE INDEX IF NOT EXISTS idx_blog_writer_task_progress_task_id ON blog_writer_task_progress(task_id); +CREATE INDEX IF NOT EXISTS idx_blog_writer_task_progress_timestamp ON blog_writer_task_progress(timestamp); + +CREATE INDEX IF NOT EXISTS idx_blog_writer_task_metrics_task_id ON blog_writer_task_metrics(task_id); +CREATE INDEX IF NOT EXISTS idx_blog_writer_task_metrics_operation ON blog_writer_task_metrics(operation); +CREATE INDEX IF NOT EXISTS idx_blog_writer_task_metrics_created_at ON blog_writer_task_metrics(created_at); + +CREATE INDEX IF NOT EXISTS idx_blog_writer_task_recovery_task_id ON blog_writer_task_recovery(task_id); +CREATE INDEX IF NOT EXISTS idx_blog_writer_task_recovery_recovered_at ON blog_writer_task_recovery(recovered_at); + +-- Function to automatically update updated_at timestamp +CREATE OR REPLACE FUNCTION update_blog_writer_tasks_updated_at() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = NOW(); + RETURN NEW; +END; +$$ language 'plpgsql'; + +-- Trigger to automatically update updated_at +CREATE TRIGGER update_blog_writer_tasks_updated_at + BEFORE UPDATE ON blog_writer_tasks + FOR EACH ROW + EXECUTE FUNCTION update_blog_writer_tasks_updated_at(); + +-- Function to clean up old completed tasks (older than 7 days) +CREATE OR REPLACE FUNCTION cleanup_old_blog_writer_tasks() +RETURNS INTEGER AS $$ +DECLARE + deleted_count INTEGER; +BEGIN + DELETE FROM blog_writer_tasks + WHERE status IN ('completed', 'failed', 'cancelled') + AND created_at < NOW() - INTERVAL '7 days'; + + GET DIAGNOSTICS deleted_count = ROW_COUNT; + RETURN deleted_count; +END; +$$ language 'plpgsql'; + +-- Create a view for task analytics +CREATE OR REPLACE VIEW blog_writer_task_analytics AS +SELECT + task_type, + status, + COUNT(*) as task_count, + AVG(EXTRACT(EPOCH FROM (completed_at - created_at))) as avg_duration_seconds, + AVG(EXTRACT(EPOCH FROM (updated_at - created_at))) as avg_processing_time_seconds, + COUNT(CASE WHEN status = 'completed' THEN 1 END) as completed_count, + COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed_count, + COUNT(CASE WHEN status = 'running' THEN 1 END) as running_count, + ROUND( + COUNT(CASE WHEN status = 'completed' THEN 1 END) * 100.0 / COUNT(*), + 2 + ) as success_rate_percentage +FROM blog_writer_tasks +WHERE created_at >= NOW() - INTERVAL '30 days' +GROUP BY task_type, status +ORDER BY task_type, status; + +-- Create a view for performance metrics +CREATE OR REPLACE VIEW blog_writer_performance_metrics AS +SELECT + t.task_type, + t.operation, + COUNT(m.id) as metric_count, + AVG(m.duration_ms) as avg_duration_ms, + MIN(m.duration_ms) as min_duration_ms, + MAX(m.duration_ms) as max_duration_ms, + SUM(m.api_calls) as total_api_calls, + SUM(m.cache_hits) as total_cache_hits, + SUM(m.cache_misses) as total_cache_misses, + ROUND( + SUM(m.cache_hits) * 100.0 / NULLIF(SUM(m.cache_hits + m.cache_misses), 0), + 2 + ) as cache_hit_rate_percentage +FROM blog_writer_tasks t +LEFT JOIN blog_writer_task_metrics m ON t.id = m.task_id +WHERE t.created_at >= NOW() - INTERVAL '7 days' +GROUP BY t.task_type, t.operation +ORDER BY t.task_type, t.operation; diff --git a/backend/logging_config.py b/backend/logging_config.py index 906255ae..1ec2ef11 100644 --- a/backend/logging_config.py +++ b/backend/logging_config.py @@ -13,6 +13,9 @@ def setup_clean_logging(): """Set up clean logging for end users.""" verbose_mode = os.getenv("ALWRITY_VERBOSE", "false").lower() == "true" + # Always remove all existing handlers first to prevent conflicts + logger.remove() + if not verbose_mode: # Suppress verbose logging for end users - be more aggressive logging.getLogger('sqlalchemy.engine').setLevel(logging.CRITICAL) @@ -81,8 +84,6 @@ def setup_clean_logging(): logging.getLogger(logger_name).setLevel(logging.WARNING) # Configure loguru to be less verbose (only show warnings and errors) - logger.remove() # Remove default handler - def warning_only_filter(record): return record["level"].name in ["WARNING", "ERROR", "CRITICAL"] @@ -94,7 +95,6 @@ def setup_clean_logging(): ) else: # In verbose mode, show all log levels with detailed formatting - logger.remove() # Remove default handler logger.add( sys.stdout.write, level="DEBUG", diff --git a/backend/middleware/logging_middleware.py b/backend/middleware/logging_middleware.py index 34cbc9ee..7a9133d2 100644 --- a/backend/middleware/logging_middleware.py +++ b/backend/middleware/logging_middleware.py @@ -310,22 +310,23 @@ seo_logger = SEOToolsLogger() log_analyzer = LogAnalyzer() # Configure loguru for structured logging -logger.add( - f"{LOG_BASE_DIR}/application.log", - rotation="1 day", - retention="30 days", - level="INFO", - format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} | {message}", - serialize=True -) +# Commented out to prevent conflicts with main logging configuration +# logger.add( +# f"{LOG_BASE_DIR}/application.log", +# rotation="1 day", +# retention="30 days", +# level="INFO", +# format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} | {message}", +# serialize=True +# ) -logger.add( - f"{LOG_BASE_DIR}/errors.log", - rotation="1 day", - retention="30 days", - level="ERROR", - format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} | {message}", - serialize=True -) +# logger.add( +# f"{LOG_BASE_DIR}/errors.log", +# rotation="1 day", +# retention="30 days", +# level="ERROR", +# format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} | {message}", +# serialize=True +# ) logger.info("Logging middleware initialized successfully") \ No newline at end of file diff --git a/backend/middleware/monitoring_middleware.py b/backend/middleware/monitoring_middleware.py deleted file mode 100644 index e84851c9..00000000 --- a/backend/middleware/monitoring_middleware.py +++ /dev/null @@ -1,635 +0,0 @@ -""" -Enhanced FastAPI Monitoring Middleware -Database-backed monitoring for API calls, errors, performance metrics, and usage tracking. -Includes comprehensive subscription-based usage monitoring and cost tracking. -""" - -from fastapi import Request, Response -from fastapi.responses import JSONResponse -import time -import json -from datetime import datetime, timedelta -from typing import Dict, List, Any, Optional -from collections import defaultdict, deque -import asyncio -from loguru import logger -from sqlalchemy.orm import Session -from sqlalchemy import and_, func -import re - -from models.api_monitoring import APIRequest, APIEndpointStats, SystemHealth, CachePerformance -from models.subscription_models import APIProvider -from services.database import get_db -from services.usage_tracking_service import UsageTrackingService -from services.pricing_service import PricingService - -class DatabaseAPIMonitor: - """Database-backed API monitoring with usage tracking and subscription management.""" - - def __init__(self): - self.cache_stats = { - 'hits': 0, - 'misses': 0, - 'hit_rate': 0.0 - } - # API provider detection patterns - Updated to match actual endpoints - self.provider_patterns = { - APIProvider.GEMINI: [ - r'gemini', r'google.*ai' - ], - APIProvider.OPENAI: [r'openai', r'gpt', r'chatgpt'], - APIProvider.ANTHROPIC: [r'anthropic', r'claude'], - APIProvider.MISTRAL: [r'mistral'], - APIProvider.TAVILY: [r'tavily'], - APIProvider.SERPER: [r'serper'], - APIProvider.METAPHOR: [r'metaphor', r'/exa'], - APIProvider.FIRECRAWL: [r'firecrawl'] - } - - def detect_api_provider(self, path: str, user_agent: str = None) -> Optional[APIProvider]: - """Detect which API provider is being used based on request details.""" - path_lower = path.lower() - user_agent_lower = (user_agent or '').lower() - - # Permanently ignore internal route families that must not accrue or check provider usage - if path_lower.startswith('/api/onboarding/') or path_lower.startswith('/api/subscription/'): - return None - - for provider, patterns in self.provider_patterns.items(): - for pattern in patterns: - if re.search(pattern, path_lower) or re.search(pattern, user_agent_lower): - return provider - - return None - - def extract_usage_metrics(self, request_body: str = None, response_body: str = None) -> Dict[str, Any]: - """Extract usage metrics from request/response bodies.""" - metrics = { - 'tokens_input': 0, - 'tokens_output': 0, - 'model_used': None, - 'search_count': 0, - 'image_count': 0, - 'page_count': 0 - } - - try: - # Try to parse request body for input tokens/content - if request_body: - request_data = json.loads(request_body) if isinstance(request_body, str) else request_body - - # Extract model information - if 'model' in request_data: - metrics['model_used'] = request_data['model'] - - # Estimate input tokens from prompt/content - if 'prompt' in request_data: - metrics['tokens_input'] = self._estimate_tokens(request_data['prompt']) - elif 'messages' in request_data: - total_content = ' '.join([msg.get('content', '') for msg in request_data['messages']]) - metrics['tokens_input'] = self._estimate_tokens(total_content) - elif 'input' in request_data: - metrics['tokens_input'] = self._estimate_tokens(str(request_data['input'])) - - # Count specific request types - if 'query' in request_data or 'search' in request_data: - metrics['search_count'] = 1 - if 'image' in request_data or 'generate_image' in request_data: - metrics['image_count'] = 1 - if 'url' in request_data or 'crawl' in request_data: - metrics['page_count'] = 1 - - # Try to parse response body for output tokens - if response_body: - response_data = json.loads(response_body) if isinstance(response_body, str) else response_body - - # Extract output content and estimate tokens - if 'text' in response_data: - metrics['tokens_output'] = self._estimate_tokens(response_data['text']) - elif 'content' in response_data: - metrics['tokens_output'] = self._estimate_tokens(str(response_data['content'])) - elif 'choices' in response_data and response_data['choices']: - choice = response_data['choices'][0] - if 'message' in choice and 'content' in choice['message']: - metrics['tokens_output'] = self._estimate_tokens(choice['message']['content']) - - # Extract actual token usage if provided by API - if 'usage' in response_data: - usage = response_data['usage'] - if 'prompt_tokens' in usage: - metrics['tokens_input'] = usage['prompt_tokens'] - if 'completion_tokens' in usage: - metrics['tokens_output'] = usage['completion_tokens'] - - except (json.JSONDecodeError, KeyError, TypeError) as e: - logger.debug(f"Could not extract usage metrics: {e}") - - return metrics - - def _estimate_tokens(self, text: str) -> int: - """Estimate token count for text (rough approximation).""" - if not text: - return 0 - # Rough estimation: 1.3 tokens per word on average - word_count = len(str(text).split()) - return int(word_count * 1.3) - - async def add_request(self, db: Session, path: str, method: str, status_code: int, - duration: float, user_id: str = None, cache_hit: bool = None, - request_size: int = None, response_size: int = None, - user_agent: str = None, ip_address: str = None, - request_body: str = None, response_body: str = None): - """Add a request to database monitoring with usage tracking.""" - try: - # Store individual request - api_request = APIRequest( - path=path, - method=method, - status_code=status_code, - duration=duration, - user_id=user_id, - cache_hit=cache_hit, - request_size=request_size, - response_size=response_size, - user_agent=user_agent, - ip_address=ip_address - ) - db.add(api_request) - - # Track API usage if this is an API call to external providers - api_provider = self.detect_api_provider(path, user_agent) - if api_provider and user_id: - logger.info(f"πŸ” Detected API call: {path} -> {api_provider.value} for user: {user_id}") - try: - # Extract usage metrics - usage_metrics = self.extract_usage_metrics(request_body, response_body) - - # Track usage with the usage tracking service - usage_service = UsageTrackingService(db) - await usage_service.track_api_usage( - user_id=user_id, - provider=api_provider, - endpoint=path, - method=method, - model_used=usage_metrics.get('model_used'), - tokens_input=usage_metrics.get('tokens_input', 0), - tokens_output=usage_metrics.get('tokens_output', 0), - response_time=duration, - status_code=status_code, - request_size=request_size, - response_size=response_size, - user_agent=user_agent, - ip_address=ip_address, - search_count=usage_metrics.get('search_count', 0), - image_count=usage_metrics.get('image_count', 0), - page_count=usage_metrics.get('page_count', 0) - ) - logger.info(f"βœ… Tracked usage for {user_id}: {api_provider.value} - {usage_metrics.get('tokens_input', 0)}+{usage_metrics.get('tokens_output', 0)} tokens") - except Exception as usage_error: - logger.error(f"Error tracking API usage: {usage_error}") - # Don't fail the main request if usage tracking fails - - # Update endpoint stats - endpoint_key = f"{method} {path}" - endpoint_stats = db.query(APIEndpointStats).filter( - APIEndpointStats.endpoint == endpoint_key - ).first() - - if not endpoint_stats: - endpoint_stats = APIEndpointStats(endpoint=endpoint_key) - db.add(endpoint_stats) - - # Update statistics - handle None values - endpoint_stats.total_requests = (endpoint_stats.total_requests or 0) + 1 - endpoint_stats.total_duration = (endpoint_stats.total_duration or 0.0) + duration - endpoint_stats.avg_duration = endpoint_stats.total_duration / endpoint_stats.total_requests - endpoint_stats.last_called = datetime.utcnow() - - if status_code >= 400: - endpoint_stats.total_errors = (endpoint_stats.total_errors or 0) + 1 - - if cache_hit is not None: - if cache_hit: - endpoint_stats.cache_hits = (endpoint_stats.cache_hits or 0) + 1 - else: - endpoint_stats.cache_misses = (endpoint_stats.cache_misses or 0) + 1 - - total_cache_requests = endpoint_stats.cache_hits + endpoint_stats.cache_misses - if total_cache_requests > 0: - endpoint_stats.cache_hit_rate = (endpoint_stats.cache_hits / total_cache_requests) * 100 - - # Update min/max duration - if endpoint_stats.min_duration is None or duration < endpoint_stats.min_duration: - endpoint_stats.min_duration = duration - if endpoint_stats.max_duration is None or duration > endpoint_stats.max_duration: - endpoint_stats.max_duration = duration - - db.commit() - - # Update cache stats - if cache_hit is not None: - if cache_hit: - self.cache_stats['hits'] += 1 - else: - self.cache_stats['misses'] += 1 - - total_cache_requests = self.cache_stats['hits'] + self.cache_stats['misses'] - if total_cache_requests > 0: - self.cache_stats['hit_rate'] = (self.cache_stats['hits'] / total_cache_requests) * 100 - - except Exception as e: - logger.error(f"❌ Error storing API request: {str(e)}") - db.rollback() - - async def get_stats(self, db: Session, minutes: int = 5) -> Dict[str, Any]: - """Get current monitoring statistics from database.""" - try: - now = datetime.utcnow() - since = now - timedelta(minutes=minutes) - - # Recent requests - recent_requests = db.query(APIRequest).filter( - APIRequest.timestamp >= since - ).count() - - # Recent errors - recent_errors = db.query(APIRequest).filter( - and_( - APIRequest.timestamp >= since, - APIRequest.status_code >= 400 - ) - ).count() - - # Top endpoints - top_endpoints = db.query(APIEndpointStats).order_by( - APIEndpointStats.total_requests.desc() - ).limit(10).all() - - # Recent errors details - recent_error_details = db.query(APIRequest).filter( - and_( - APIRequest.timestamp >= since, - APIRequest.status_code >= 400 - ) - ).order_by(APIRequest.timestamp.desc()).limit(10).all() - - # Overall stats - total_requests = db.query(APIRequest).count() - total_errors = db.query(APIRequest).filter(APIRequest.status_code >= 400).count() - - # Calculate error rate - error_rate = (recent_errors / max(recent_requests, 1)) * 100 - - return { - 'timestamp': now.isoformat(), - 'overview': { - 'total_requests': total_requests, - 'total_errors': total_errors, - 'recent_requests': recent_requests, - 'recent_errors': recent_errors - }, - 'cache_performance': self.cache_stats, - 'top_endpoints': [ - { - 'endpoint': endpoint.endpoint, - 'count': endpoint.total_requests or 0, - 'avg_time': round(endpoint.avg_duration or 0.0, 3), - 'errors': endpoint.total_errors or 0, - 'last_called': endpoint.last_called.isoformat() if endpoint.last_called else None, - 'cache_hit_rate': round(endpoint.cache_hit_rate or 0.0, 2) - } - for endpoint in top_endpoints - ], - 'recent_errors': [ - { - 'timestamp': error.timestamp.isoformat(), - 'path': error.path, - 'method': error.method, - 'status_code': error.status_code, - 'duration': error.duration - } - for error in recent_error_details - ], - 'system_health': { - 'status': 'healthy' if recent_errors < 5 else 'warning', - 'error_rate': round(error_rate, 2) - } - } - - except Exception as e: - logger.error(f"❌ Error getting monitoring stats: {str(e)}") - return { - 'timestamp': datetime.utcnow().isoformat(), - 'error': str(e), - 'overview': {'total_requests': 0, 'total_errors': 0, 'recent_requests': 0, 'recent_errors': 0}, - 'system_health': {'status': 'unknown', 'error_rate': 0.0} - } - - async def get_lightweight_stats(self, db: Session) -> Dict[str, Any]: - """Get lightweight stats for dashboard header.""" - try: - now = datetime.utcnow() - since = now - timedelta(minutes=5) - - # Quick stats for dashboard - recent_requests = db.query(APIRequest).filter( - APIRequest.timestamp >= since - ).count() - - recent_errors = db.query(APIRequest).filter( - and_( - APIRequest.timestamp >= since, - APIRequest.status_code >= 400 - ) - ).count() - - # Determine status - if recent_errors == 0: - status = "healthy" - icon = "🟒" - elif recent_errors < 3: - status = "warning" - icon = "🟑" - else: - status = "critical" - icon = "πŸ”΄" - - return { - 'status': status, - 'icon': icon, - 'recent_requests': recent_requests, - 'recent_errors': recent_errors, - 'error_rate': round((recent_errors / max(recent_requests, 1)) * 100, 1), - 'timestamp': now.isoformat() - } - - except Exception as e: - logger.error(f"❌ Error getting lightweight stats: {str(e)}") - return { - 'status': 'unknown', - 'icon': 'βšͺ', - 'recent_requests': 0, - 'recent_errors': 0, - 'error_rate': 0.0, - 'timestamp': datetime.utcnow().isoformat() - } - -# Global monitor instance -api_monitor = DatabaseAPIMonitor() - -# List of endpoints to exclude from monitoring -EXCLUDED_ENDPOINTS = [ - "/api/content-planning/monitoring/lightweight-stats", - "/api/content-planning/monitoring/api-stats", - "/api/content-planning/monitoring/cache-stats", - "/api/content-planning/monitoring/health" -] -# Also exclude whole route families by prefix (e.g., subscription/billing must never be blocked) -EXCLUDED_PREFIXES = [ -] - - -def should_monitor_endpoint(path: str) -> bool: - """Check if an endpoint should be monitored.""" - return not any(path.endswith(excluded) for excluded in EXCLUDED_ENDPOINTS) and not any(path.startswith(prefix) for prefix in EXCLUDED_PREFIXES) - -async def check_usage_limits_middleware(request: Request, user_id: str, request_body: str = None) -> Optional[JSONResponse]: - """Check usage limits before processing request.""" - if not user_id: - return None - - # No special whitelist; onboarding/subscription are ignored by provider detection - try: - path = request.url.path - except Exception: - pass - - try: - db = next(get_db()) - api_monitor = DatabaseAPIMonitor() - - # Detect if this is an API call that should be rate limited - api_provider = api_monitor.detect_api_provider(request.url.path, request.headers.get('user-agent')) - if not api_provider: - return None - - # Use provided request body or read it if not provided - if request_body is None: - try: - if hasattr(request, '_body'): - request_body = request._body - else: - # Try to read body (this might not work in all cases) - body = await request.body() - request_body = body.decode('utf-8') if body else None - except: - pass - - # Estimate tokens needed - tokens_requested = 0 - if request_body: - usage_metrics = api_monitor.extract_usage_metrics(request_body) - tokens_requested = usage_metrics.get('tokens_input', 0) - - # Check limits - usage_service = UsageTrackingService(db) - can_proceed, message, usage_info = await usage_service.enforce_usage_limits( - user_id=user_id, - provider=api_provider, - tokens_requested=tokens_requested - ) - - if not can_proceed: - logger.warning(f"Usage limit exceeded for {user_id}: {message}") - return JSONResponse( - status_code=429, - content={ - "error": "Usage limit exceeded", - "message": message, - "usage_info": usage_info, - "provider": api_provider.value - } - ) - - # Warn if approaching limits - if usage_info.get('call_usage_percentage', 0) >= 80 or usage_info.get('cost_usage_percentage', 0) >= 80: - logger.warning(f"User {user_id} approaching usage limits: {usage_info}") - - return None - - except Exception as e: - logger.error(f"Error checking usage limits: {e}") - # Don't block requests if usage checking fails - return None - finally: - db.close() - -async def monitoring_middleware(request: Request, call_next): - """Enhanced FastAPI middleware for monitoring API calls with usage tracking.""" - start_time = time.time() - - # Skip monitoring for excluded endpoints - if not should_monitor_endpoint(request.url.path): - response = await call_next(request) - return response - - # Extract request details - Enhanced user identification - user_id = None - try: - # PRIORITY 1: Check request.state.user_id (set by API key injection middleware) - if hasattr(request.state, 'user_id') and request.state.user_id: - user_id = request.state.user_id - logger.debug(f"Monitoring: Using user_id from request.state: {user_id}") - - # PRIORITY 2: Check query parameters - elif hasattr(request, 'query_params') and 'user_id' in request.query_params: - user_id = request.query_params['user_id'] - elif hasattr(request, 'path_params') and 'user_id' in request.path_params: - user_id = request.path_params['user_id'] - - # PRIORITY 3: Check headers for user identification - elif 'x-user-id' in request.headers: - user_id = request.headers['x-user-id'] - elif 'x-user-email' in request.headers: - user_id = request.headers['x-user-email'] # Use email as user identifier - elif 'x-session-id' in request.headers: - user_id = request.headers['x-session-id'] # Use session as fallback - - # Check for authorization header with user info - elif 'authorization' in request.headers: - # Auth middleware should have set request.state.user_id - # If not, this indicates an authentication failure that should be logged - user_id = None - logger.warning("Monitoring: Auth header present but no user_id in state - authentication may have failed") - - # For alpha testing, use IP address as user identifier if no other ID found - # But only if there's no auth header (truly anonymous) - elif not user_id and request.client and 'authorization' not in request.headers: - user_id = f"alpha_user_{request.client.host}" - - # Final fallback: None (skip usage limits for truly anonymous/unauthenticated) - # This prevents false positives for authenticated users - else: - user_id = None - - except Exception as e: - logger.debug(f"Error extracting user ID: {e}") - user_id = None # On error, skip usage limits - - # Capture request body for usage tracking (read once, safely) - request_body = None - try: - # Only read body for POST/PUT/PATCH requests to avoid issues - if request.method in ['POST', 'PUT', 'PATCH']: - if hasattr(request, '_body') and request._body: - request_body = request._body.decode('utf-8') - else: - # Read body only if it hasn't been read yet - try: - body = await request.body() - request_body = body.decode('utf-8') if body else None - except Exception as body_error: - logger.debug(f"Could not read request body: {body_error}") - request_body = None - except Exception as e: - logger.debug(f"Error capturing request body: {e}") - request_body = None - - # Check usage limits before processing - limit_response = await check_usage_limits_middleware(request, user_id, request_body) - if limit_response: - return limit_response - - # Get database session - db = next(get_db()) - - try: - response = await call_next(request) - status_code = response.status_code - duration = time.time() - start_time - - # Capture response body for usage tracking - response_body = None - try: - if hasattr(response, 'body'): - response_body = response.body.decode('utf-8') if response.body else None - elif hasattr(response, '_content'): - response_body = response._content.decode('utf-8') if response._content else None - except: - pass - - # Check for cache-related headers - cache_hit = None - if hasattr(response, 'headers'): - cache_header = response.headers.get('x-cache-status') - if cache_header: - cache_hit = cache_header.lower() == 'hit' - - # Store in database with enhanced tracking - await api_monitor.add_request( - db=db, - path=request.url.path, - method=request.method, - status_code=status_code, - duration=duration, - user_id=user_id, - cache_hit=cache_hit, - request_size=len(request_body) if request_body else None, - response_size=len(response_body) if response_body else None, - user_agent=request.headers.get('user-agent'), - ip_address=request.client.host if request.client else None, - request_body=request_body, - response_body=response_body - ) - - # Add monitoring headers - response.headers['x-response-time'] = f"{duration:.3f}s" - response.headers['x-monitor-id'] = f"{int(time.time())}" - - return response - - except Exception as e: - duration = time.time() - start_time - status_code = 500 - - # Store error in database with enhanced tracking - await api_monitor.add_request( - db=db, - path=request.url.path, - method=request.method, - status_code=status_code, - duration=duration, - user_id=user_id, - cache_hit=False, - request_size=len(request_body) if request_body else None, - response_size=None, - user_agent=request.headers.get('user-agent'), - ip_address=request.client.host if request.client else None, - request_body=request_body, - response_body=None - ) - - logger.error(f"❌ API Error: {request.method} {request.url.path} - {str(e)}") - - return JSONResponse( - status_code=500, - content={"error": "Internal server error", "monitor_id": int(time.time())} - ) - finally: - db.close() - -async def get_monitoring_stats(minutes: int = 5) -> Dict[str, Any]: - """Get current monitoring statistics.""" - db = next(get_db()) - try: - return await api_monitor.get_stats(db, minutes) - finally: - db.close() - -async def get_lightweight_stats() -> Dict[str, Any]: - """Get lightweight stats for dashboard header.""" - db = next(get_db()) - try: - return await api_monitor.get_lightweight_stats(db) - finally: - db.close() diff --git a/backend/models/blog_models.py b/backend/models/blog_models.py index 9b53661e..76cc92b2 100644 --- a/backend/models/blog_models.py +++ b/backend/models/blog_models.py @@ -71,6 +71,9 @@ class BlogResearchResponse(BaseModel): grounding_metadata: Optional[GroundingMetadata] = None # Google grounding metadata original_keywords: List[str] = [] # Original user-provided keywords for caching error_message: Optional[str] = None # Error message for graceful failures + retry_suggested: Optional[bool] = None # Whether retry is recommended + error_code: Optional[str] = None # Specific error code + actionable_steps: List[str] = [] # Steps user can take to resolve the issue class BlogOutlineSection(BaseModel): diff --git a/backend/package.json b/backend/package.json new file mode 100644 index 00000000..c9ffecd4 --- /dev/null +++ b/backend/package.json @@ -0,0 +1,7 @@ +{ + "dependencies": { + "@copilotkit/react-core": "^1.10.6", + "@copilotkit/react-textarea": "^1.10.6", + "@copilotkit/react-ui": "^1.10.6" + } +} diff --git a/backend/routers/bing_oauth.py b/backend/routers/bing_oauth.py index 0da5c565..467904cb 100644 --- a/backend/routers/bing_oauth.py +++ b/backend/routers/bing_oauth.py @@ -279,3 +279,26 @@ async def bing_oauth_health(): "timestamp": "2024-01-01T00:00:00Z", "version": "1.0.0" } + +@router.post("/purge-expired") +async def purge_expired_bing_tokens( + user: Dict[str, Any] = Depends(get_current_user) +): + """Purge user's expired/inactive Bing tokens to avoid refresh loops before reauth.""" + try: + user_id = user.get('id') + if not user_id: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="User ID not found.") + + deleted = oauth_service.purge_expired_tokens(user_id) + return { + "success": True, + "purged": deleted, + "message": f"Purged {deleted} expired/inactive Bing tokens" + } + except Exception as e: + logger.error(f"Error purging expired Bing tokens: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to purge expired Bing tokens." + ) \ No newline at end of file diff --git a/backend/routers/linkedin.py b/backend/routers/linkedin.py index 34760d94..7be0a495 100644 --- a/backend/routers/linkedin.py +++ b/backend/routers/linkedin.py @@ -22,7 +22,7 @@ from services.linkedin_service import LinkedInService # Initialize the LinkedIn service instance linkedin_service = LinkedInService() -from middleware.monitoring_middleware import DatabaseAPIMonitor +from services.subscription.monitoring_middleware import DatabaseAPIMonitor from services.database import get_db_session from sqlalchemy.orm import Session diff --git a/backend/scripts/cleanup_onboarding_json_files.py b/backend/scripts/cleanup_onboarding_json_files.py new file mode 100644 index 00000000..93a6100d --- /dev/null +++ b/backend/scripts/cleanup_onboarding_json_files.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +""" +Cleanup Onboarding JSON Files Script + +This script removes any stale .onboarding_progress*.json files from the backend root. +These files were used in the old file-based onboarding system and are no longer needed +since we've migrated to database-only storage. + +Usage: + python backend/scripts/cleanup_onboarding_json_files.py [--dry-run] [--force] + +Options: + --dry-run Show what would be deleted without actually deleting + --force Skip confirmation prompt (use with caution) +""" + +import os +import sys +import glob +import argparse +from pathlib import Path +from loguru import logger + +# Add backend to path for imports +backend_dir = Path(__file__).parent.parent +sys.path.insert(0, str(backend_dir)) + +def find_onboarding_json_files(backend_root: Path) -> list: + """Find all .onboarding_progress*.json files in backend root.""" + pattern = str(backend_root / ".onboarding_progress*.json") + files = glob.glob(pattern) + return [Path(f) for f in files] + +def cleanup_json_files(backend_root: Path, dry_run: bool = False, force: bool = False) -> int: + """ + Clean up onboarding JSON files. + + Args: + backend_root: Path to backend directory + dry_run: If True, only show what would be deleted + force: If True, skip confirmation prompt + + Returns: + Number of files processed + """ + files = find_onboarding_json_files(backend_root) + + if not files: + logger.info("βœ… No onboarding JSON files found to clean up") + return 0 + + logger.info(f"Found {len(files)} onboarding JSON file(s):") + for file in files: + logger.info(f" - {file.name}") + + if dry_run: + logger.info("πŸ” DRY RUN: Would delete the above files") + return len(files) + + if not force: + response = input(f"\nDelete {len(files)} onboarding JSON file(s)? (y/N): ").strip().lower() + if response not in ['y', 'yes']: + logger.info("❌ Cleanup cancelled by user") + return 0 + + deleted_count = 0 + for file in files: + try: + file.unlink() + logger.info(f"πŸ—‘οΈ Deleted: {file.name}") + deleted_count += 1 + except Exception as e: + logger.error(f"❌ Failed to delete {file.name}: {e}") + + logger.info(f"βœ… Cleanup complete: {deleted_count}/{len(files)} files deleted") + return deleted_count + +def main(): + """Main function.""" + parser = argparse.ArgumentParser(description="Clean up onboarding JSON files") + parser.add_argument("--dry-run", action="store_true", help="Show what would be deleted without actually deleting") + parser.add_argument("--force", action="store_true", help="Skip confirmation prompt") + + args = parser.parse_args() + + # Get backend root directory + script_dir = Path(__file__).parent + backend_root = script_dir.parent + + logger.info(f"🧹 Onboarding JSON Cleanup Script") + logger.info(f"Backend root: {backend_root}") + + if args.dry_run: + logger.info("πŸ” Running in DRY RUN mode") + + try: + deleted_count = cleanup_json_files(backend_root, args.dry_run, args.force) + + if deleted_count > 0: + logger.info("βœ… Cleanup completed successfully") + else: + logger.info("ℹ️ No files needed cleanup") + + except Exception as e: + logger.error(f"❌ Cleanup failed: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/backend/scripts/create_billing_tables.py b/backend/scripts/create_billing_tables.py index e0e6401d..bc494a90 100644 --- a/backend/scripts/create_billing_tables.py +++ b/backend/scripts/create_billing_tables.py @@ -19,7 +19,7 @@ import traceback # Import models from models.subscription_models import Base as SubscriptionBase from services.database import DATABASE_URL -from services.pricing_service import PricingService +from services.subscription.pricing_service import PricingService def create_billing_tables(): """Create all billing and subscription-related tables.""" diff --git a/backend/scripts/create_subscription_tables.py b/backend/scripts/create_subscription_tables.py index 9c63a10c..14b3ce6b 100644 --- a/backend/scripts/create_subscription_tables.py +++ b/backend/scripts/create_subscription_tables.py @@ -19,7 +19,7 @@ import traceback # Import models from models.subscription_models import Base as SubscriptionBase from services.database import DATABASE_URL -from services.pricing_service import PricingService +from services.subscription.pricing_service import PricingService def create_subscription_tables(): """Create all subscription-related tables.""" diff --git a/backend/scripts/init_alpha_subscription_tiers.py b/backend/scripts/init_alpha_subscription_tiers.py index 8f0b31f0..41229d4e 100644 --- a/backend/scripts/init_alpha_subscription_tiers.py +++ b/backend/scripts/init_alpha_subscription_tiers.py @@ -18,6 +18,9 @@ from loguru import logger def create_alpha_subscription_tiers(): """Create subscription tiers for alpha testers.""" + if os.getenv('ENABLE_ALPHA', 'false').lower() not in {'1','true','yes','on'}: + logger.info("Alpha tier initialization is disabled (ENABLE_ALPHA is false)") + return False db = get_db_session() if not db: @@ -225,6 +228,9 @@ def create_api_pricing(db: Session): def assign_default_plan_to_users(): """Assign Free Alpha plan to all existing users.""" + if os.getenv('ENABLE_ALPHA', 'false').lower() not in {'1','true','yes','on'}: + logger.info("Alpha default plan assignment is disabled (ENABLE_ALPHA is false)") + return False db = get_db_session() if not db: diff --git a/backend/services/__init__.py b/backend/services/__init__.py index b01fc0e0..c1e1c794 100644 --- a/backend/services/__init__.py +++ b/backend/services/__init__.py @@ -1,6 +1,6 @@ """Services package for ALwrity backend.""" -from .api_key_manager import ( +from .onboarding.api_key_manager import ( APIKeyManager, OnboardingProgress, get_onboarding_progress, diff --git a/backend/services/analytics/handlers/bing_handler.py b/backend/services/analytics/handlers/bing_handler.py index f7a91695..995b5578 100644 --- a/backend/services/analytics/handlers/bing_handler.py +++ b/backend/services/analytics/handlers/bing_handler.py @@ -15,6 +15,7 @@ from ..models.analytics_data import AnalyticsData from ..models.platform_types import PlatformType from .base_handler import BaseAnalyticsHandler from ..insights.bing_insights_service import BingInsightsService +from services.bing_analytics_storage_service import BingAnalyticsStorageService import os @@ -27,6 +28,8 @@ class BingAnalyticsHandler(BaseAnalyticsHandler): # Initialize insights service database_url = os.getenv('DATABASE_URL', 'sqlite:///./bing_analytics.db') self.insights_service = BingInsightsService(database_url) + # Storage service used in onboarding step 5 + self.storage_service = BingAnalyticsStorageService(os.getenv('DATABASE_URL', 'sqlite:///alwrity.db')) async def get_analytics(self, user_id: str) -> AnalyticsData: """ @@ -44,43 +47,50 @@ class BingAnalyticsHandler(BaseAnalyticsHandler): logger.info("Fetching fresh Bing analytics for user {user_id} (expensive operation)", user_id=user_id) try: - # Get user's Bing connection status - connection_status = self.bing_service.get_connection_status(user_id) + # Get user's Bing connection status with detailed token info + token_status = self.bing_service.get_user_token_status(user_id) - if not connection_status.get('connected'): - return self.create_error_response('Bing Webmaster not connected') + if not token_status.get('has_active_tokens'): + if token_status.get('has_expired_tokens'): + return self.create_error_response('Bing Webmaster tokens expired - please reconnect') + else: + return self.create_error_response('Bing Webmaster not connected') - # Get the first connected site token info - token_sites = connection_status.get('sites', []) - if not token_sites: - return self.create_error_response('No Bing Webmaster sites found') + # Try once to fetch sites (may return empty if tokens are valid but no verified sites); do not block + sites = self.bing_service.get_user_sites(user_id) - # Get the first token's access token - token_info = token_sites[0] + # Get active tokens for access token + active_tokens = token_status.get('active_tokens', []) + if not active_tokens: + return self.create_error_response('No active Bing Webmaster tokens available') + + # Get the first active token's access token + token_info = active_tokens[0] access_token = token_info.get('access_token') - # Get the actual site URLs from Bing API when needed for analytics - # Check cache first for sites data - cached_sites = analytics_cache.get('bing_sites', user_id) - if cached_sites: - logger.info(f"Using cached Bing sites for analytics for user {user_id}") - sites = cached_sites - else: - # Fetch sites from API and cache them - logger.info(f"Fetching fresh Bing sites for analytics for user {user_id}") - sites = self.bing_service.get_user_sites(user_id) - if not sites: - return self.create_error_response('No site URLs found in Bing Webmaster API') - - # Cache the sites for future use - analytics_cache.set('bing_sites', user_id, sites, ttl_override=2*60*60) - logger.info(f"Cached Bing sites for analytics for user {user_id} (TTL: 2 hours)") + # Cache the sites for future use (even if empty) + analytics_cache.set('bing_sites', user_id, sites or [], ttl_override=2*60*60) + logger.info(f"Cached Bing sites for analytics for user {user_id} (TTL: 2 hours)") if not access_token: return self.create_error_response('Bing Webmaster access token not available') - # Get actual query stats for the first site using the Bing service - query_stats = await self._get_query_stats(user_id, sites) + # Do NOT call live Bing APIs here; use stored analytics like step 5 + query_stats = {} + try: + # If sites available, use first; otherwise ask storage for any stored summary + site_url_for_storage = sites[0].get('Url', '') if (sites and isinstance(sites[0], dict)) else None + stored = self.storage_service.get_analytics_summary(user_id, site_url_for_storage, days=30) + if stored and isinstance(stored, dict): + query_stats = { + 'total_clicks': stored.get('summary', {}).get('total_clicks', 0), + 'total_impressions': stored.get('summary', {}).get('total_impressions', 0), + 'total_queries': stored.get('summary', {}).get('total_queries', 0), + 'avg_ctr': stored.get('summary', {}).get('total_ctr', 0), + 'avg_position': stored.get('summary', {}).get('avg_position', 0), + } + except Exception as e: + logger.warning(f"Bing analytics: Failed to read stored analytics summary: {e}") # Get enhanced insights from database insights = self._get_enhanced_insights(user_id, sites[0].get('Url', '') if sites else '') @@ -101,7 +111,11 @@ class BingAnalyticsHandler(BaseAnalyticsHandler): 'note': 'Bing Webmaster API provides SEO insights, search performance, and index status data' } - result = self.create_success_response(metrics=metrics) + # If no stored data or no sites, return partial like step 5, else success + if (not sites) or (metrics.get('total_impressions', 0) == 0 and metrics.get('total_clicks', 0) == 0): + result = self.create_partial_response(metrics=metrics, error_message='Connected to Bing; waiting for stored analytics or site verification') + else: + result = self.create_success_response(metrics=metrics) # Cache the result to avoid expensive API calls analytics_cache.set('bing_analytics', user_id, result.__dict__) diff --git a/backend/services/api_key_manager.py b/backend/services/api_key_manager.py deleted file mode 100644 index 20e98683..00000000 --- a/backend/services/api_key_manager.py +++ /dev/null @@ -1,770 +0,0 @@ -"""Enhanced API Key Manager service for ALwrity backend.""" - -# This file contains the core business logic moved from lib/utils/api_key_manager/ -# It includes the OnboardingProgress class and related functionality - -import os -import json -from datetime import datetime -from typing import Dict, Any, List, Optional -from dataclasses import dataclass, asdict -from enum import Enum -from loguru import logger -from dotenv import load_dotenv - -class StepStatus(Enum): - PENDING = "pending" - IN_PROGRESS = "in_progress" - COMPLETED = "completed" - SKIPPED = "skipped" - -@dataclass -class StepData: - step_number: int - title: str - description: str - status: StepStatus - completed_at: Optional[str] = None - data: Optional[Dict[str, Any]] = None - validation_errors: List[str] = None - - def __post_init__(self): - if self.validation_errors is None: - self.validation_errors = [] - -class OnboardingProgress: - """Manages onboarding progress with persistence and validation.""" - - def __init__(self, progress_file: Optional[str] = None, user_id: Optional[str] = None): - self.steps = self._initialize_steps() - self.current_step = 1 - self.started_at = datetime.now().isoformat() - self.last_updated = datetime.now().isoformat() - self.is_completed = False - self.completed_at = None - self.user_id = user_id # Add user_id for database isolation - - # Use user-specific file for backward compatibility - if user_id: - self.progress_file = progress_file or f".onboarding_progress_{user_id}.json" - else: - self.progress_file = progress_file or ".onboarding_progress.json" - - # Initialize database service for dual persistence - try: - from services.onboarding_database_service import OnboardingDatabaseService - self.db_service = OnboardingDatabaseService() - self.use_database = True - logger.info(f"Database service initialized for user {user_id}") - except Exception as e: - logger.warning(f"Database service not available, using file only: {e}") - self.db_service = None - self.use_database = False - - # Load existing progress if available - self.load_progress() - - def _initialize_steps(self) -> List[StepData]: - """Initialize the 6-step onboarding process.""" - return [ - StepData(1, "AI LLM Providers", "Configure AI language model providers", StepStatus.PENDING), - StepData(2, "Website Analysis", "Set up website analysis and crawling", StepStatus.PENDING), - StepData(3, "AI Research", "Configure AI research capabilities", StepStatus.PENDING), - StepData(4, "Personalization", "Set up personalization features", StepStatus.PENDING), - StepData(5, "Integrations", "Configure ALwrity integrations", StepStatus.PENDING), - StepData(6, "Complete Setup", "Finalize and complete onboarding", StepStatus.PENDING) - ] - - def get_step_data(self, step_number: int) -> Optional[StepData]: - """Get data for a specific step.""" - for step in self.steps: - if step.step_number == step_number: - return step - return None - - def mark_step_completed(self, step_number: int, data: Optional[Dict[str, Any]] = None): - """Mark a step as completed.""" - logger.info(f"[mark_step_completed] Marking step {step_number} as completed") - step = self.get_step_data(step_number) - if step: - step.status = StepStatus.COMPLETED - step.completed_at = datetime.now().isoformat() - step.data = data - self.last_updated = datetime.now().isoformat() - - # Check if all steps are now completed - all_completed = all(s.status in [StepStatus.COMPLETED, StepStatus.SKIPPED] for s in self.steps) - - if all_completed: - # If all steps are completed, mark onboarding as complete - self.is_completed = True - self.completed_at = datetime.now().isoformat() - self.current_step = len(self.steps) # Set to last step number - logger.info(f"[mark_step_completed] All steps completed, marking onboarding as complete") - else: - # Only increment current_step if there are more steps to go - self.current_step = step_number + 1 - # Ensure current_step doesn't exceed total steps - if self.current_step > len(self.steps): - self.current_step = len(self.steps) - - logger.info(f"[mark_step_completed] Step {step_number} completed, new current_step: {self.current_step}, is_completed: {self.is_completed}") - self.save_progress() - logger.info(f"Step {step_number} marked as completed") - else: - logger.error(f"[mark_step_completed] Step {step_number} not found") - - def mark_step_in_progress(self, step_number: int): - """Mark a step as in progress.""" - step = self.get_step_data(step_number) - if step: - step.status = StepStatus.IN_PROGRESS - self.current_step = step_number - self.last_updated = datetime.now().isoformat() - self.save_progress() - logger.info(f"Step {step_number} marked as in progress") - - def mark_step_skipped(self, step_number: int): - """Mark a step as skipped.""" - step = self.get_step_data(step_number) - if step: - step.status = StepStatus.SKIPPED - step.completed_at = datetime.now().isoformat() - self.last_updated = datetime.now().isoformat() - - # Check if all steps are now completed - all_completed = all(s.status in [StepStatus.COMPLETED, StepStatus.SKIPPED] for s in self.steps) - - if all_completed: - # If all steps are completed, mark onboarding as complete - self.is_completed = True - self.completed_at = datetime.now().isoformat() - self.current_step = len(self.steps) # Set to last step number - logger.info(f"[mark_step_skipped] All steps completed, marking onboarding as complete") - else: - # Only increment current_step if there are more steps to go - self.current_step = step_number + 1 - # Ensure current_step doesn't exceed total steps - if self.current_step > len(self.steps): - self.current_step = len(self.steps) - - logger.info(f"[mark_step_skipped] Step {step_number} skipped, new current_step: {self.current_step}, is_completed: {self.is_completed}") - self.save_progress() - logger.info(f"Step {step_number} marked as skipped") - - def can_proceed_to_step(self, step_number: int) -> bool: - """Check if user can proceed to a specific step.""" - if step_number == 1: - return True # First step is always accessible - - # Check if all previous steps are completed - for step in self.steps: - if step.step_number < step_number: - if step.status not in [StepStatus.COMPLETED, StepStatus.SKIPPED]: - return False - - return True - - def can_complete_onboarding(self) -> bool: - """Check if onboarding can be completed.""" - required_steps = [1, 2, 3, 6] # Steps 1, 2, 3, and 6 are required - for step_num in required_steps: - step = self.get_step_data(step_num) - if step and step.status in [StepStatus.COMPLETED, StepStatus.SKIPPED]: - continue - - # DB-aware fallback for steps 2 and 3 - try: - from services.onboarding_database_service import OnboardingDatabaseService - from services.database import get_db - db = next(get_db()) - db_service = OnboardingDatabaseService(db) - if step_num == 2: - w = db_service.get_website_analysis(self.user_id, db) - if w and (w.get('website_url') or w.get('writing_style')): - # Mark as completed to normalize state - try: - self.mark_step_completed(2, {'source': 'db-fallback'}) - except Exception: - pass - continue - if step_num == 3: - p = db_service.get_research_preferences(self.user_id, db) - if p and p.get('research_depth'): - try: - self.mark_step_completed(3, {'source': 'db-fallback'}) - except Exception: - pass - continue - except Exception: - pass - - return False - return True - - def get_completion_percentage(self) -> float: - """Get the completion percentage.""" - completed_steps = sum(1 for step in self.steps if step.status in [StepStatus.COMPLETED, StepStatus.SKIPPED]) - - # If we have a current step that's not completed, give partial credit - if self.current_step > 0 and self.current_step <= len(self.steps): - # Give 50% credit for being on the current step (even if not completed) - current_step_progress = 0.5 if self.current_step > completed_steps else 0 - total_progress = completed_steps + current_step_progress - percentage = (total_progress / len(self.steps)) * 100 - logger.info(f"Progress calculation: {percentage}% (completed: {completed_steps}, current: {self.current_step}, current_progress: {current_step_progress})") - return percentage - - percentage = (completed_steps / len(self.steps)) * 100 - logger.info(f"Progress calculation (no current step): {percentage}% (completed: {completed_steps}/{len(self.steps)})") - return percentage - - def get_next_incomplete_step(self) -> Optional[int]: - """Get the next incomplete step number.""" - for step in self.steps: - if step.status not in [StepStatus.COMPLETED, StepStatus.SKIPPED]: - return step.step_number - return None - - def get_resume_step(self) -> int: - """Get the step to resume from.""" - logger.info(f"[get_resume_step] Checking resume step...") - logger.info(f"[get_resume_step] Current step: {self.current_step}") - logger.info(f"[get_resume_step] Steps status: {[f'{s.step_number}:{s.status.value}' for s in self.steps]}") - - for step in self.steps: - if step.status not in [StepStatus.COMPLETED, StepStatus.SKIPPED]: - logger.info(f"[get_resume_step] Found incomplete step: {step.step_number}") - return step.step_number - - logger.warning(f"[get_resume_step] No incomplete steps found, defaulting to step 1") - return 1 # Default to first step - - def complete_onboarding(self): - """Complete the onboarding process.""" - self.is_completed = True - self.completed_at = datetime.now().isoformat() - self.last_updated = datetime.now().isoformat() - self.save_progress() - logger.info("Onboarding completed successfully") - - def save_progress(self): - """Save progress to both file and database (dual persistence).""" - try: - # Save to JSON file (backward compatibility) - progress_data = { - "steps": [{ - "step_number": step.step_number, - "title": step.title, - "description": step.description, - "status": step.status.value, # Convert enum to string - "completed_at": step.completed_at, - "data": step.data, - "validation_errors": step.validation_errors - } for step in self.steps], - "current_step": self.current_step, - "started_at": self.started_at, - "last_updated": self.last_updated, - "is_completed": self.is_completed, - "completed_at": self.completed_at - } - - with open(self.progress_file, 'w') as f: - json.dump(progress_data, f, indent=2) - - logger.debug(f"Progress saved to {self.progress_file}") - - # Also save to database if available and user_id is set - if self.use_database and self.db_service and self.user_id: - try: - from services.database import SessionLocal - db = SessionLocal() - try: - # Update session progress - self.db_service.update_step(self.user_id, self.current_step, db) - - # Calculate progress percentage - completed_count = sum(1 for s in self.steps if s.status == StepStatus.COMPLETED) - progress_pct = (completed_count / len(self.steps)) * 100 - self.db_service.update_progress(self.user_id, progress_pct, db) - - # Save step-specific data to appropriate tables - for step in self.steps: - if step.status == StepStatus.COMPLETED and step.data: - if step.step_number == 1: # API Keys - api_keys = step.data.get('api_keys', {}) - for provider, key in api_keys.items(): - if key: - # Save to database (for user isolation in production) - self.db_service.save_api_key(self.user_id, provider, key, db) - - # Also save to .env file ONLY in local development - # This allows local developers to have keys in .env for convenience - # In production, keys are fetched from database per user - is_local = os.getenv('DEPLOY_ENV', 'local') == 'local' - if is_local: - try: - from services.api_key_manager import APIKeyManager - api_key_manager = APIKeyManager() - api_key_manager.save_api_key(provider, key) - logger.info(f"[LOCAL] API key for {provider} saved to .env file") - except Exception as env_error: - logger.warning(f"[LOCAL] Failed to save {provider} API key to .env file: {env_error}") - else: - logger.info(f"[PRODUCTION] API key for {provider} saved to database only (user: {self.user_id})") - - # Log database save confirmation - logger.info(f"βœ… DATABASE: API key for {provider} saved to database for user {self.user_id}") - elif step.step_number == 2: # Website Analysis - self.db_service.save_website_analysis(self.user_id, step.data, db) - logger.info(f"βœ… DATABASE: Website analysis saved to database for user {self.user_id}") - elif step.step_number == 3: # Research Preferences - self.db_service.save_research_preferences(self.user_id, step.data, db) - logger.info(f"βœ… DATABASE: Research preferences saved to database for user {self.user_id}") - elif step.step_number == 4: # Persona Generation - self.db_service.save_persona_data(self.user_id, step.data, db) - logger.info(f"βœ… DATABASE: Persona data saved to database for user {self.user_id}") - - logger.info(f"Progress also saved to database for user {self.user_id}") - finally: - db.close() - except Exception as db_error: - logger.warning(f"Failed to save to database, JSON file still saved: {db_error}") - # Don't fail if database save fails - JSON is still working - - except Exception as e: - logger.error(f"Error saving progress: {str(e)}") - - def load_progress(self): - """Load progress from file.""" - try: - if os.path.exists(self.progress_file): - with open(self.progress_file, 'r') as f: - progress_data = json.load(f) - - # Restore step data - for step_data in progress_data.get("steps", []): - step_num = step_data.get("step_number") - if step_num: - step = self.get_step_data(step_num) - if step: - step.status = StepStatus(step_data.get("status", "pending")) - step.completed_at = step_data.get("completed_at") - step.data = step_data.get("data") - step.validation_errors = step_data.get("validation_errors", []) - - # Restore other data - self.current_step = progress_data.get("current_step", 1) - self.started_at = progress_data.get("started_at", self.started_at) - self.last_updated = progress_data.get("last_updated", self.last_updated) - self.is_completed = progress_data.get("is_completed", False) - self.completed_at = progress_data.get("completed_at") - - # Fix any corrupted state - self._fix_corrupted_state() - - logger.info("Progress loaded from file") - except Exception as e: - logger.error(f"Error loading progress: {str(e)}") - - def _fix_corrupted_state(self): - """Fix any corrupted progress state.""" - # Check if all steps are completed - all_steps_completed = all(s.status in [StepStatus.COMPLETED, StepStatus.SKIPPED] for s in self.steps) - - if all_steps_completed: - # If all steps are completed, ensure is_completed is True and current_step is valid - if not self.is_completed: - logger.info(f"[_fix_corrupted_state] All steps completed but is_completed was False, fixing...") - self.is_completed = True - self.completed_at = datetime.now().isoformat() - - # Ensure current_step doesn't exceed total steps - if self.current_step > len(self.steps): - logger.info(f"[_fix_corrupted_state] Current step {self.current_step} exceeds total steps {len(self.steps)}, fixing...") - self.current_step = len(self.steps) - self.save_progress() - else: - # If not all steps are completed, ensure is_completed is False - if self.is_completed: - logger.info(f"[_fix_corrupted_state] Not all steps completed but is_completed was True, fixing...") - self.is_completed = False - self.completed_at = None - self.save_progress() - - def reset_progress(self): - """Reset all progress.""" - self.steps = self._initialize_steps() - self.current_step = 1 - self.started_at = datetime.now().isoformat() - self.last_updated = datetime.now().isoformat() - self.is_completed = False - self.completed_at = None - self.save_progress() - logger.info("Progress reset successfully") - -class APIKeyManager: - """Enhanced manager for handling API keys with setup instructions.""" - - def __init__(self): - self.api_keys = { - "openai": None, - "gemini": None, - "anthropic": None, - "mistral": None, - "tavily": None, - "serper": None, - "metaphor": None, # legacy mapping for Exa, kept for backward compatibility - "exa": None, - "firecrawl": None, - "stability": None, - "copilotkit": None, - } - self.load_api_keys() - - # Enhanced provider setup instructions - self.api_key_groups = { - "Create": { - "GEMINI_API_KEY": { - "url": "https://makersuite.google.com/app/apikey", - "description": "Google's Gemini AI for content generation", - "setup_steps": [ - "Visit Google AI Studio", - "Create a Google Cloud account", - "Enable Gemini API", - "Generate API key" - ] - }, - "OPENAI_API_KEY": { - "url": "https://platform.openai.com/api-keys", - "description": "OpenAI's GPT models for content creation", - "setup_steps": [ - "Go to OpenAI platform", - "Create an account", - "Navigate to API keys", - "Create new API key" - ] - }, - "MISTRAL_API_KEY": { - "url": "https://console.mistral.ai/api-keys/", - "description": "Mistral AI for efficient content generation", - "setup_steps": [ - "Visit Mistral AI website", - "Sign up for an account", - "Access API section", - "Generate API key" - ] - }, - "ANTHROPIC_API_KEY": { - "url": "https://console.anthropic.com/", - "description": "Anthropic's Claude models for content creation", - "setup_steps": [ - "Visit Anthropic console", - "Create an account", - "Navigate to API keys", - "Generate API key" - ] - } - }, - "Research": { - "TAVILY_API_KEY": { - "url": "https://tavily.com/#api", - "description": "Powers intelligent web research features", - "setup_steps": [ - "Go to Tavily's website", - "Create an account", - "Access your API dashboard", - "Generate a new API key" - ] - }, - "SERPER_API_KEY": { - "url": "https://serper.dev/signup", - "description": "Enables Google search functionality", - "setup_steps": [ - "Visit Serper.dev", - "Sign up for an account", - "Go to API section", - "Create your API key" - ] - } - }, - "Deep Search": { - "EXA_API_KEY": { - "url": "https://dashboard.exa.ai/login", - "description": "Exa (formerly Metaphor) for advanced web search", - "setup_steps": [ - "Visit the Exa AI dashboard", - "Sign up for a free account", - "Navigate to API Keys section", - "Create a new API key" - ] - }, - "FIRECRAWL_API_KEY": { - "url": "https://www.firecrawl.dev/account", - "description": "Enables web content extraction", - "setup_steps": [ - "Visit Firecrawl website", - "Sign up for an account", - "Access API dashboard", - "Create your API key" - ] - } - }, - "Integrations": { - "STABILITY_API_KEY": { - "url": "https://platform.stability.ai/", - "description": "Enables AI image generation", - "setup_steps": [ - "Access Stability AI platform", - "Create an account", - "Navigate to API settings", - "Generate your API key" - ] - } - }, - "UI": { - "COPILOTKIT_API_KEY": { - "url": "https://copilotkit.ai", - "description": "CopilotKit public API key for in-app assistant", - "setup_steps": [ - "Sign up or log in to CopilotKit", - "Navigate to API Keys", - "Generate a public API key (ck_pub_...)" - ] - } - } - } - - def save_api_key(self, provider: str, api_key: str) -> bool: - """Save an API key for a provider.""" - try: - if provider in self.api_keys: - self.api_keys[provider] = api_key - - # Save to database if available and user_id is set - if hasattr(self, 'use_database') and self.use_database and hasattr(self, 'db_service') and self.db_service and hasattr(self, 'user_id') and self.user_id: - try: - from services.database import SessionLocal - db = SessionLocal() - try: - self.db_service.save_api_key(self.user_id, provider, api_key, db) - logger.info(f"βœ… DATABASE: API key for {provider} saved to database for user {self.user_id}") - finally: - db.close() - except Exception as db_error: - logger.warning(f"Failed to save {provider} API key to database: {db_error}") - - # Also save to .env file in local mode - is_local = os.getenv('DEPLOY_ENV', 'local') == 'local' - if is_local: - # Special handling for CopilotKit - save to frontend/.env - if provider == 'copilotkit': - self._save_to_frontend_env(api_key) - logger.info(f"[LOCAL] CopilotKit API key saved to frontend/.env file") - else: - # Save other keys to backend/.env - self._save_to_env_file(provider, api_key) - logger.info(f"[LOCAL] API key for {provider} saved to backend/.env file") - else: - logger.info(f"[PRODUCTION] API key for {provider} saved to memory only (database handles persistence)") - - return True - else: - logger.error(f"Unknown provider: {provider}") - return False - except Exception as e: - logger.error(f"Error saving API key: {str(e)}") - return False - - def get_api_key(self, provider: str) -> Optional[str]: - """Get API key for a provider.""" - return self.api_keys.get(provider) - - def get_all_keys(self) -> Dict[str, str]: - """Get all configured API keys.""" - return {k: v for k, v in self.api_keys.items() if v is not None} - - def load_api_keys(self): - """Load API keys from environment variables.""" - # Reload environment variables first - use backend directory path - import os - backend_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - env_path = os.path.join(backend_dir, ".env") - load_dotenv(env_path, override=True) - - env_mapping = { - "OPENAI_API_KEY": "openai", - "GEMINI_API_KEY": "gemini", - "ANTHROPIC_API_KEY": "anthropic", - "MISTRAL_API_KEY": "mistral", - "TAVILY_API_KEY": "tavily", - "SERPER_API_KEY": "serper", - "METAPHOR_API_KEY": "metaphor", # legacy - "EXA_API_KEY": "exa", - "FIRECRAWL_API_KEY": "firecrawl", - "STABILITY_API_KEY": "stability", - "COPILOTKIT_API_KEY": "copilotkit", - } - - for env_var, provider in env_mapping.items(): - api_key = os.getenv(env_var) - if api_key: - self.api_keys[provider] = api_key - - def get_provider_setup_info(self, provider: str) -> Optional[Dict[str, Any]]: - """Get setup information for a specific provider.""" - for group_name, providers in self.api_key_groups.items(): - for env_var, info in providers.items(): - if env_var.lower().replace('_api_key', '').replace('_key', '') == provider: - return { - "provider": provider, - "group": group_name, - "url": info["url"], - "description": info["description"], - "setup_steps": info["setup_steps"] - } - return None - - def get_all_providers_info(self) -> Dict[str, Any]: - """Get information for all providers.""" - return { - "groups": self.api_key_groups, - "configured_providers": [k for k, v in self.api_keys.items() if v], - "total_providers": len(self.api_keys) - } - - def _save_to_frontend_env(self, api_key: str): - """Save CopilotKit API key to frontend/.env file.""" - try: - # Get the frontend directory path - backend_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - frontend_dir = os.path.join(os.path.dirname(backend_dir), "frontend") - env_path = os.path.join(frontend_dir, ".env") - - # Read existing .env file - if os.path.exists(env_path): - with open(env_path, 'r', encoding='utf-8', errors='ignore') as f: - lines = f.readlines() - else: - lines = [] - - # Update or add REACT_APP_COPILOTKIT_API_KEY - key_found = False - updated_lines = [] - env_var = "REACT_APP_COPILOTKIT_API_KEY" - - for line in lines: - if line.startswith(f"{env_var}="): - updated_lines.append(f"{env_var}={api_key}\n") - key_found = True - else: - updated_lines.append(line) - - if not key_found: - # Ensure the file ends with a newline before adding new key - if updated_lines and not updated_lines[-1].endswith('\n'): - updated_lines[-1] += '\n' - updated_lines.append(f"{env_var}={api_key}\n") - - # Write back to frontend .env file - with open(env_path, 'w', encoding='utf-8') as f: - f.writelines(updated_lines) - - logger.debug(f"CopilotKit API key saved to frontend .env file") - - except Exception as e: - logger.error(f"Error saving to frontend .env file: {str(e)}") - - def _save_to_env_file(self, provider: str, api_key: str): - """Save API key to backend .env file.""" - try: - env_mapping = { - "openai": "OPENAI_API_KEY", - "gemini": "GEMINI_API_KEY", - "anthropic": "ANTHROPIC_API_KEY", - "mistral": "MISTRAL_API_KEY", - "tavily": "TAVILY_API_KEY", - "serper": "SERPER_API_KEY", - "metaphor": "METAPHOR_API_KEY", # legacy - "exa": "EXA_API_KEY", - "firecrawl": "FIRECRAWL_API_KEY", - "stability": "STABILITY_API_KEY", - "copilotkit": "COPILOTKIT_API_KEY", - } - - env_var = env_mapping.get(provider) - if env_var: - # Update environment variable - os.environ[env_var] = api_key - - # Update .env file - use backend directory path - backend_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - env_path = os.path.join(backend_dir, ".env") - if os.path.exists(env_path): - with open(env_path, 'r', encoding='utf-8', errors='ignore') as f: - lines = f.readlines() - else: - lines = [] - - key_found = False - updated_lines = [] - for line in lines: - if line.startswith(f"{env_var}="): - updated_lines.append(f"{env_var}={api_key}\n") - key_found = True - else: - updated_lines.append(line) - - if not key_found: - # Ensure the file ends with a newline before adding new key - if updated_lines and not updated_lines[-1].endswith('\n'): - updated_lines[-1] += '\n' - updated_lines.append(f"{env_var}={api_key}\n") - - with open(env_path, 'w', encoding='utf-8') as f: - f.writelines(updated_lines) - - # Reload environment variables into current process - load_dotenv(env_path, override=True) - - # Verify the key is now in environment - loaded_key = os.environ.get(env_var) - if loaded_key == api_key: - logger.info(f"βœ… {env_var} loaded into environment (available for immediate use)") - else: - logger.warning(f"⚠️ {env_var} written to .env but not in environment yet") - - logger.debug(f"API key saved to .env file for {provider}") - except Exception as e: - logger.error(f"Error saving to .env file: {str(e)}") - -# Global instance for the application -_onboarding_progress = None -_user_onboarding_progress_cache: Dict[str, OnboardingProgress] = {} - -def get_onboarding_progress() -> OnboardingProgress: - """Get the global onboarding progress instance.""" - if not hasattr(get_onboarding_progress, '_instance'): - get_onboarding_progress._instance = OnboardingProgress() - return get_onboarding_progress._instance - -def get_onboarding_progress_for_user(user_id: str) -> OnboardingProgress: - """Get or create a per-user onboarding progress instance with database persistence.""" - global _user_onboarding_progress_cache - safe_user_id = ''.join([c if c.isalnum() or c in ('-', '_') else '_' for c in str(user_id)]) - if safe_user_id in _user_onboarding_progress_cache: - return _user_onboarding_progress_cache[safe_user_id] - - # Create user-specific progress file for backward compatibility - progress_file = f".onboarding_progress_{safe_user_id}.json" - - # Pass user_id to enable database persistence - instance = OnboardingProgress(progress_file=progress_file, user_id=user_id) - _user_onboarding_progress_cache[safe_user_id] = instance - return instance - -def get_api_key_manager() -> APIKeyManager: - """Get the global API key manager instance.""" - if not hasattr(get_api_key_manager, '_instance'): - get_api_key_manager._instance = APIKeyManager() - return get_api_key_manager._instance \ No newline at end of file diff --git a/backend/services/blog_writer/circuit_breaker.py b/backend/services/blog_writer/circuit_breaker.py new file mode 100644 index 00000000..4ece34cc --- /dev/null +++ b/backend/services/blog_writer/circuit_breaker.py @@ -0,0 +1,209 @@ +""" +Circuit Breaker Pattern for Blog Writer API Calls + +Implements circuit breaker pattern to prevent cascading failures when external APIs +are experiencing issues. Tracks failure rates and automatically disables calls when +threshold is exceeded, with auto-recovery after cooldown period. +""" + +import time +import asyncio +from typing import Callable, Any, Optional, Dict +from enum import Enum +from dataclasses import dataclass +from loguru import logger + +from .exceptions import CircuitBreakerOpenException + + +class CircuitState(Enum): + """Circuit breaker states.""" + CLOSED = "closed" # Normal operation + OPEN = "open" # Circuit is open, calls are blocked + HALF_OPEN = "half_open" # Testing if service is back + + +@dataclass +class CircuitBreakerConfig: + """Configuration for circuit breaker.""" + failure_threshold: int = 5 # Number of failures before opening + recovery_timeout: int = 60 # Seconds to wait before trying again + success_threshold: int = 3 # Successes needed to close from half-open + timeout: int = 30 # Timeout for individual calls + max_failures_per_minute: int = 10 # Max failures per minute before opening + + +class CircuitBreaker: + """Circuit breaker implementation for API calls.""" + + def __init__(self, name: str, config: Optional[CircuitBreakerConfig] = None): + self.name = name + self.config = config or CircuitBreakerConfig() + self.state = CircuitState.CLOSED + self.failure_count = 0 + self.success_count = 0 + self.last_failure_time = 0 + self.last_success_time = 0 + self.failure_times = [] # Track failure times for rate limiting + self._lock = asyncio.Lock() + + async def call(self, func: Callable, *args, **kwargs) -> Any: + """ + Execute function with circuit breaker protection. + + Args: + func: Function to execute + *args: Function arguments + **kwargs: Function keyword arguments + + Returns: + Function result + + Raises: + CircuitBreakerOpenException: If circuit is open + """ + async with self._lock: + # Check if circuit should be opened due to rate limiting + await self._check_rate_limit() + + # Check circuit state + if self.state == CircuitState.OPEN: + if self._should_attempt_reset(): + self.state = CircuitState.HALF_OPEN + self.success_count = 0 + logger.info(f"Circuit breaker {self.name} transitioning to HALF_OPEN") + else: + retry_after = int(self.config.recovery_timeout - (time.time() - self.last_failure_time)) + raise CircuitBreakerOpenException( + f"Circuit breaker {self.name} is OPEN", + retry_after=max(0, retry_after), + context={"circuit_name": self.name, "state": self.state.value} + ) + + try: + # Execute the function with timeout + result = await asyncio.wait_for( + func(*args, **kwargs), + timeout=self.config.timeout + ) + + # Record success + await self._record_success() + return result + + except asyncio.TimeoutError: + await self._record_failure("timeout") + raise + except Exception as e: + await self._record_failure(str(e)) + raise + + async def _check_rate_limit(self): + """Check if failure rate exceeds threshold.""" + current_time = time.time() + + # Remove failures older than 1 minute + self.failure_times = [ + failure_time for failure_time in self.failure_times + if current_time - failure_time < 60 + ] + + # Check if we've exceeded the rate limit + if len(self.failure_times) >= self.config.max_failures_per_minute: + self.state = CircuitState.OPEN + self.last_failure_time = current_time + logger.warning(f"Circuit breaker {self.name} opened due to rate limit: {len(self.failure_times)} failures in last minute") + + def _should_attempt_reset(self) -> bool: + """Check if enough time has passed to attempt reset.""" + return time.time() - self.last_failure_time >= self.config.recovery_timeout + + async def _record_success(self): + """Record a successful call.""" + async with self._lock: + self.last_success_time = time.time() + + if self.state == CircuitState.HALF_OPEN: + self.success_count += 1 + if self.success_count >= self.config.success_threshold: + self.state = CircuitState.CLOSED + self.failure_count = 0 + logger.info(f"Circuit breaker {self.name} closed after {self.success_count} successes") + elif self.state == CircuitState.CLOSED: + # Reset failure count on success + self.failure_count = 0 + + async def _record_failure(self, error: str): + """Record a failed call.""" + async with self._lock: + current_time = time.time() + self.failure_count += 1 + self.last_failure_time = current_time + self.failure_times.append(current_time) + + logger.warning(f"Circuit breaker {self.name} recorded failure #{self.failure_count}: {error}") + + # Open circuit if threshold exceeded + if self.failure_count >= self.config.failure_threshold: + self.state = CircuitState.OPEN + logger.error(f"Circuit breaker {self.name} opened after {self.failure_count} failures") + + def get_state(self) -> Dict[str, Any]: + """Get current circuit breaker state.""" + return { + "name": self.name, + "state": self.state.value, + "failure_count": self.failure_count, + "success_count": self.success_count, + "last_failure_time": self.last_failure_time, + "last_success_time": self.last_success_time, + "failures_in_last_minute": len([ + t for t in self.failure_times + if time.time() - t < 60 + ]) + } + + +class CircuitBreakerManager: + """Manages multiple circuit breakers.""" + + def __init__(self): + self._breakers: Dict[str, CircuitBreaker] = {} + + def get_breaker(self, name: str, config: Optional[CircuitBreakerConfig] = None) -> CircuitBreaker: + """Get or create a circuit breaker.""" + if name not in self._breakers: + self._breakers[name] = CircuitBreaker(name, config) + return self._breakers[name] + + def get_all_states(self) -> Dict[str, Dict[str, Any]]: + """Get states of all circuit breakers.""" + return {name: breaker.get_state() for name, breaker in self._breakers.items()} + + def reset_breaker(self, name: str): + """Reset a circuit breaker to closed state.""" + if name in self._breakers: + self._breakers[name].state = CircuitState.CLOSED + self._breakers[name].failure_count = 0 + self._breakers[name].success_count = 0 + logger.info(f"Circuit breaker {name} manually reset") + + +# Global circuit breaker manager +circuit_breaker_manager = CircuitBreakerManager() + + +def circuit_breaker(name: str, config: Optional[CircuitBreakerConfig] = None): + """ + Decorator to add circuit breaker protection to async functions. + + Args: + name: Circuit breaker name + config: Circuit breaker configuration + """ + def decorator(func: Callable) -> Callable: + async def wrapper(*args, **kwargs): + breaker = circuit_breaker_manager.get_breaker(name, config) + return await breaker.call(func, *args, **kwargs) + return wrapper + return decorator diff --git a/backend/services/blog_writer/database_task_manager.py b/backend/services/blog_writer/database_task_manager.py new file mode 100644 index 00000000..3d1f6771 --- /dev/null +++ b/backend/services/blog_writer/database_task_manager.py @@ -0,0 +1,536 @@ +""" +Database-Backed Task Manager for Blog Writer + +Replaces in-memory task storage with persistent database storage for +reliability, recovery, and analytics. +""" + +import asyncio +import uuid +import json +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional +from loguru import logger + +from services.blog_writer.logger_config import blog_writer_logger, log_function_call +from models.blog_models import ( + BlogResearchRequest, + BlogOutlineRequest, + MediumBlogGenerateRequest, + MediumBlogGenerateResult, +) +from services.blog_writer.blog_service import BlogWriterService + + +class DatabaseTaskManager: + """Database-backed task manager for blog writer operations.""" + + def __init__(self, db_connection): + self.db = db_connection + self.service = BlogWriterService() + self._cleanup_task = None + self._start_cleanup_task() + + def _start_cleanup_task(self): + """Start background task to clean up old completed tasks.""" + async def cleanup_loop(): + while True: + try: + await self.cleanup_old_tasks() + await asyncio.sleep(3600) # Run every hour + except Exception as e: + logger.error(f"Error in cleanup task: {e}") + await asyncio.sleep(300) # Wait 5 minutes on error + + self._cleanup_task = asyncio.create_task(cleanup_loop()) + + @log_function_call("create_task") + async def create_task( + self, + user_id: str, + task_type: str, + request_data: Dict[str, Any], + correlation_id: Optional[str] = None, + operation: Optional[str] = None, + priority: int = 0, + max_retries: int = 3, + metadata: Optional[Dict[str, Any]] = None + ) -> str: + """Create a new task in the database.""" + task_id = str(uuid.uuid4()) + correlation_id = correlation_id or str(uuid.uuid4()) + + query = """ + INSERT INTO blog_writer_tasks + (id, user_id, task_type, status, request_data, correlation_id, operation, priority, max_retries, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + """ + + await self.db.execute( + query, + task_id, + user_id, + task_type, + 'pending', + json.dumps(request_data), + correlation_id, + operation, + priority, + max_retries, + json.dumps(metadata or {}) + ) + + blog_writer_logger.log_operation_start( + "task_created", + task_id=task_id, + task_type=task_type, + user_id=user_id, + correlation_id=correlation_id + ) + + return task_id + + @log_function_call("get_task_status") + async def get_task_status(self, task_id: str) -> Optional[Dict[str, Any]]: + """Get the status of a task.""" + query = """ + SELECT + id, user_id, task_type, status, request_data, result_data, error_data, + created_at, updated_at, completed_at, correlation_id, operation, + retry_count, max_retries, priority, metadata + FROM blog_writer_tasks + WHERE id = $1 + """ + + row = await self.db.fetchrow(query, task_id) + if not row: + return None + + # Get progress messages + progress_query = """ + SELECT timestamp, message, percentage, progress_type, metadata + FROM blog_writer_task_progress + WHERE task_id = $1 + ORDER BY timestamp DESC + LIMIT 10 + """ + + progress_rows = await self.db.fetch(progress_query, task_id) + progress_messages = [ + { + "timestamp": row["timestamp"].isoformat(), + "message": row["message"], + "percentage": float(row["percentage"]), + "progress_type": row["progress_type"], + "metadata": row["metadata"] or {} + } + for row in progress_rows + ] + + return { + "task_id": row["id"], + "user_id": row["user_id"], + "task_type": row["task_type"], + "status": row["status"], + "created_at": row["created_at"].isoformat(), + "updated_at": row["updated_at"].isoformat(), + "completed_at": row["completed_at"].isoformat() if row["completed_at"] else None, + "correlation_id": row["correlation_id"], + "operation": row["operation"], + "retry_count": row["retry_count"], + "max_retries": row["max_retries"], + "priority": row["priority"], + "progress_messages": progress_messages, + "result": json.loads(row["result_data"]) if row["result_data"] else None, + "error": json.loads(row["error_data"]) if row["error_data"] else None, + "metadata": json.loads(row["metadata"]) if row["metadata"] else {} + } + + @log_function_call("update_task_status") + async def update_task_status( + self, + task_id: str, + status: str, + result_data: Optional[Dict[str, Any]] = None, + error_data: Optional[Dict[str, Any]] = None, + completed_at: Optional[datetime] = None + ): + """Update task status and data.""" + query = """ + UPDATE blog_writer_tasks + SET status = $2, result_data = $3, error_data = $4, completed_at = $5, updated_at = NOW() + WHERE id = $1 + """ + + await self.db.execute( + query, + task_id, + status, + json.dumps(result_data) if result_data else None, + json.dumps(error_data) if error_data else None, + completed_at or (datetime.now() if status in ['completed', 'failed', 'cancelled'] else None) + ) + + blog_writer_logger.log_operation_end( + "task_status_updated", + 0, + success=status in ['completed', 'cancelled'], + task_id=task_id, + status=status + ) + + @log_function_call("update_progress") + async def update_progress( + self, + task_id: str, + message: str, + percentage: Optional[float] = None, + progress_type: str = "info", + metadata: Optional[Dict[str, Any]] = None + ): + """Update task progress.""" + # Insert progress record + progress_query = """ + INSERT INTO blog_writer_task_progress + (task_id, message, percentage, progress_type, metadata) + VALUES ($1, $2, $3, $4, $5) + """ + + await self.db.execute( + progress_query, + task_id, + message, + percentage or 0.0, + progress_type, + json.dumps(metadata or {}) + ) + + # Update task status to running if it was pending + status_query = """ + UPDATE blog_writer_tasks + SET status = 'running', updated_at = NOW() + WHERE id = $1 AND status = 'pending' + """ + + await self.db.execute(status_query, task_id) + + logger.info(f"Progress update for task {task_id}: {message}") + + @log_function_call("record_metrics") + async def record_metrics( + self, + task_id: str, + operation: str, + duration_ms: int, + token_usage: Optional[Dict[str, int]] = None, + api_calls: int = 0, + cache_hits: int = 0, + cache_misses: int = 0, + error_count: int = 0, + metadata: Optional[Dict[str, Any]] = None + ): + """Record performance metrics for a task.""" + query = """ + INSERT INTO blog_writer_task_metrics + (task_id, operation, duration_ms, token_usage, api_calls, cache_hits, cache_misses, error_count, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) + """ + + await self.db.execute( + query, + task_id, + operation, + duration_ms, + json.dumps(token_usage) if token_usage else None, + api_calls, + cache_hits, + cache_misses, + error_count, + json.dumps(metadata or {}) + ) + + blog_writer_logger.log_performance( + f"task_metrics_{operation}", + duration_ms, + "ms", + task_id=task_id, + operation=operation, + api_calls=api_calls, + cache_hits=cache_hits, + cache_misses=cache_misses + ) + + @log_function_call("increment_retry_count") + async def increment_retry_count(self, task_id: str) -> int: + """Increment retry count and return new count.""" + query = """ + UPDATE blog_writer_tasks + SET retry_count = retry_count + 1, updated_at = NOW() + WHERE id = $1 + RETURNING retry_count + """ + + result = await self.db.fetchval(query, task_id) + return result or 0 + + @log_function_call("cleanup_old_tasks") + async def cleanup_old_tasks(self, days: int = 7) -> int: + """Clean up old completed tasks.""" + query = """ + DELETE FROM blog_writer_tasks + WHERE status IN ('completed', 'failed', 'cancelled') + AND created_at < NOW() - INTERVAL '%s days' + """ % days + + result = await self.db.execute(query) + deleted_count = int(result.split()[-1]) if result else 0 + + if deleted_count > 0: + logger.info(f"Cleaned up {deleted_count} old blog writer tasks") + + return deleted_count + + @log_function_call("get_user_tasks") + async def get_user_tasks( + self, + user_id: str, + limit: int = 50, + offset: int = 0, + status_filter: Optional[str] = None + ) -> List[Dict[str, Any]]: + """Get tasks for a specific user.""" + query = """ + SELECT + id, task_type, status, created_at, updated_at, completed_at, + operation, retry_count, max_retries, priority + FROM blog_writer_tasks + WHERE user_id = $1 + """ + + params = [user_id] + param_count = 1 + + if status_filter: + param_count += 1 + query += f" AND status = ${param_count}" + params.append(status_filter) + + query += f" ORDER BY created_at DESC LIMIT ${param_count + 1} OFFSET ${param_count + 2}" + params.extend([limit, offset]) + + rows = await self.db.fetch(query, *params) + + return [ + { + "task_id": row["id"], + "task_type": row["task_type"], + "status": row["status"], + "created_at": row["created_at"].isoformat(), + "updated_at": row["updated_at"].isoformat(), + "completed_at": row["completed_at"].isoformat() if row["completed_at"] else None, + "operation": row["operation"], + "retry_count": row["retry_count"], + "max_retries": row["max_retries"], + "priority": row["priority"] + } + for row in rows + ] + + @log_function_call("get_task_analytics") + async def get_task_analytics(self, days: int = 7) -> Dict[str, Any]: + """Get task analytics for monitoring.""" + query = """ + SELECT + task_type, + status, + COUNT(*) as task_count, + AVG(EXTRACT(EPOCH FROM (COALESCE(completed_at, NOW()) - created_at))) as avg_duration_seconds, + COUNT(CASE WHEN status = 'completed' THEN 1 END) as completed_count, + COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed_count, + COUNT(CASE WHEN status = 'running' THEN 1 END) as running_count + FROM blog_writer_tasks + WHERE created_at >= NOW() - INTERVAL '%s days' + GROUP BY task_type, status + ORDER BY task_type, status + """ % days + + rows = await self.db.fetch(query) + + analytics = { + "summary": { + "total_tasks": sum(row["task_count"] for row in rows), + "completed_tasks": sum(row["completed_count"] for row in rows), + "failed_tasks": sum(row["failed_count"] for row in rows), + "running_tasks": sum(row["running_count"] for row in rows) + }, + "by_task_type": {}, + "by_status": {} + } + + for row in rows: + task_type = row["task_type"] + status = row["status"] + + if task_type not in analytics["by_task_type"]: + analytics["by_task_type"][task_type] = {} + + analytics["by_task_type"][task_type][status] = { + "count": row["task_count"], + "avg_duration_seconds": float(row["avg_duration_seconds"]) if row["avg_duration_seconds"] else 0 + } + + if status not in analytics["by_status"]: + analytics["by_status"][status] = 0 + analytics["by_status"][status] += row["task_count"] + + return analytics + + # Task execution methods (same as original but with database persistence) + async def start_research_task(self, request: BlogResearchRequest, user_id: str) -> str: + """Start a research operation and return a task ID.""" + task_id = await self.create_task( + user_id=user_id, + task_type="research", + request_data=request.dict(), + operation="research_operation" + ) + + # Start the research operation in the background + asyncio.create_task(self._run_research_task(task_id, request)) + + return task_id + + async def start_outline_task(self, request: BlogOutlineRequest, user_id: str) -> str: + """Start an outline generation operation and return a task ID.""" + task_id = await self.create_task( + user_id=user_id, + task_type="outline", + request_data=request.dict(), + operation="outline_generation" + ) + + # Start the outline generation operation in the background + asyncio.create_task(self._run_outline_generation_task(task_id, request)) + + return task_id + + async def start_medium_generation_task(self, request: MediumBlogGenerateRequest, user_id: str) -> str: + """Start a medium blog generation task.""" + task_id = await self.create_task( + user_id=user_id, + task_type="medium_generation", + request_data=request.dict(), + operation="medium_blog_generation" + ) + + asyncio.create_task(self._run_medium_generation_task(task_id, request)) + return task_id + + async def _run_research_task(self, task_id: str, request: BlogResearchRequest): + """Background task to run research and update status with progress messages.""" + try: + await self.update_progress(task_id, "πŸ” Starting research operation...", 0) + + # Run the actual research with progress updates + result = await self.service.research_with_progress(request, task_id) + + # Check if research failed gracefully + if not result.success: + await self.update_progress( + task_id, + f"❌ Research failed: {result.error_message or 'Unknown error'}", + 100, + "error" + ) + await self.update_task_status( + task_id, + "failed", + error_data={ + "error_message": result.error_message, + "retry_suggested": result.retry_suggested, + "error_code": result.error_code, + "actionable_steps": result.actionable_steps + } + ) + else: + await self.update_progress( + task_id, + f"βœ… Research completed successfully! Found {len(result.sources)} sources and {len(result.search_queries or [])} search queries.", + 100, + "success" + ) + await self.update_task_status( + task_id, + "completed", + result_data=result.dict() + ) + + except Exception as e: + await self.update_progress(task_id, f"❌ Research failed with error: {str(e)}", 100, "error") + await self.update_task_status( + task_id, + "failed", + error_data={"error_message": str(e), "error_type": type(e).__name__} + ) + blog_writer_logger.log_error(e, "research_task", context={"task_id": task_id}) + + async def _run_outline_generation_task(self, task_id: str, request: BlogOutlineRequest): + """Background task to run outline generation and update status with progress messages.""" + try: + await self.update_progress(task_id, "🧩 Starting outline generation...", 0) + + # Run the actual outline generation with progress updates + result = await self.service.generate_outline_with_progress(request, task_id) + + await self.update_progress( + task_id, + f"βœ… Outline generated successfully! Created {len(result.outline)} sections with {len(result.title_options)} title options.", + 100, + "success" + ) + await self.update_task_status(task_id, "completed", result_data=result.dict()) + + except Exception as e: + await self.update_progress(task_id, f"❌ Outline generation failed: {str(e)}", 100, "error") + await self.update_task_status( + task_id, + "failed", + error_data={"error_message": str(e), "error_type": type(e).__name__} + ) + blog_writer_logger.log_error(e, "outline_generation_task", context={"task_id": task_id}) + + async def _run_medium_generation_task(self, task_id: str, request: MediumBlogGenerateRequest): + """Background task to generate a medium blog using a single structured JSON call.""" + try: + await self.update_progress(task_id, "πŸ“¦ Packaging outline and metadata...", 0) + + # Basic guard: respect global target words + total_target = int(request.globalTargetWords or 1000) + if total_target > 1000: + raise ValueError("Global target words exceed 1000; medium generation not allowed") + + result: MediumBlogGenerateResult = await self.service.generate_medium_blog_with_progress( + request, + task_id, + ) + + if not result or not getattr(result, "sections", None): + raise ValueError("Empty generation result from model") + + # Check if result came from cache + cache_hit = getattr(result, 'cache_hit', False) + if cache_hit: + await self.update_progress(task_id, "⚑ Found cached content - loading instantly!", 100, "success") + else: + await self.update_progress(task_id, "πŸ€– Generated fresh content with AI...", 100, "success") + + await self.update_task_status(task_id, "completed", result_data=result.dict()) + + except Exception as e: + await self.update_progress(task_id, f"❌ Medium generation failed: {str(e)}", 100, "error") + await self.update_task_status( + task_id, + "failed", + error_data={"error_message": str(e), "error_type": type(e).__name__} + ) + blog_writer_logger.log_error(e, "medium_generation_task", context={"task_id": task_id}) diff --git a/backend/services/blog_writer/exceptions.py b/backend/services/blog_writer/exceptions.py new file mode 100644 index 00000000..e512be45 --- /dev/null +++ b/backend/services/blog_writer/exceptions.py @@ -0,0 +1,285 @@ +""" +Blog Writer Exception Hierarchy + +Defines custom exception classes for different failure modes in the AI Blog Writer. +Each exception includes error_code, user_message, retry_suggested, and actionable_steps. +""" + +from typing import List, Optional, Dict, Any +from enum import Enum + + +class ErrorCategory(Enum): + """Categories for error classification.""" + TRANSIENT = "transient" # Temporary issues, retry recommended + PERMANENT = "permanent" # Permanent issues, no retry + USER_ERROR = "user_error" # User input issues, fix input + API_ERROR = "api_error" # External API issues + VALIDATION_ERROR = "validation_error" # Data validation issues + SYSTEM_ERROR = "system_error" # Internal system issues + + +class BlogWriterException(Exception): + """Base exception for all Blog Writer errors.""" + + def __init__( + self, + message: str, + error_code: str, + user_message: str, + retry_suggested: bool = False, + actionable_steps: Optional[List[str]] = None, + error_category: ErrorCategory = ErrorCategory.SYSTEM_ERROR, + context: Optional[Dict[str, Any]] = None + ): + super().__init__(message) + self.error_code = error_code + self.user_message = user_message + self.retry_suggested = retry_suggested + self.actionable_steps = actionable_steps or [] + self.error_category = error_category + self.context = context or {} + + def to_dict(self) -> Dict[str, Any]: + """Convert exception to dictionary for API responses.""" + return { + "error_code": self.error_code, + "user_message": self.user_message, + "retry_suggested": self.retry_suggested, + "actionable_steps": self.actionable_steps, + "error_category": self.error_category.value, + "context": self.context + } + + +class ResearchFailedException(BlogWriterException): + """Raised when research operation fails.""" + + def __init__( + self, + message: str, + user_message: str = "Research failed. Please try again with different keywords or check your internet connection.", + retry_suggested: bool = True, + context: Optional[Dict[str, Any]] = None + ): + super().__init__( + message=message, + error_code="RESEARCH_FAILED", + user_message=user_message, + retry_suggested=retry_suggested, + actionable_steps=[ + "Try with different keywords", + "Check your internet connection", + "Wait a few minutes and try again", + "Contact support if the issue persists" + ], + error_category=ErrorCategory.API_ERROR, + context=context + ) + + +class OutlineGenerationException(BlogWriterException): + """Raised when outline generation fails.""" + + def __init__( + self, + message: str, + user_message: str = "Outline generation failed. Please try again or adjust your research data.", + retry_suggested: bool = True, + context: Optional[Dict[str, Any]] = None + ): + super().__init__( + message=message, + error_code="OUTLINE_GENERATION_FAILED", + user_message=user_message, + retry_suggested=retry_suggested, + actionable_steps=[ + "Try generating outline again", + "Check if research data is complete", + "Try with different research keywords", + "Contact support if the issue persists" + ], + error_category=ErrorCategory.API_ERROR, + context=context + ) + + +class ContentGenerationException(BlogWriterException): + """Raised when content generation fails.""" + + def __init__( + self, + message: str, + user_message: str = "Content generation failed. Please try again or adjust your outline.", + retry_suggested: bool = True, + context: Optional[Dict[str, Any]] = None + ): + super().__init__( + message=message, + error_code="CONTENT_GENERATION_FAILED", + user_message=user_message, + retry_suggested=retry_suggested, + actionable_steps=[ + "Try generating content again", + "Check if outline is complete", + "Try with a shorter outline", + "Contact support if the issue persists" + ], + error_category=ErrorCategory.API_ERROR, + context=context + ) + + +class SEOAnalysisException(BlogWriterException): + """Raised when SEO analysis fails.""" + + def __init__( + self, + message: str, + user_message: str = "SEO analysis failed. Content was generated but SEO optimization is unavailable.", + retry_suggested: bool = True, + context: Optional[Dict[str, Any]] = None + ): + super().__init__( + message=message, + error_code="SEO_ANALYSIS_FAILED", + user_message=user_message, + retry_suggested=retry_suggested, + actionable_steps=[ + "Try SEO analysis again", + "Continue without SEO optimization", + "Contact support if the issue persists" + ], + error_category=ErrorCategory.API_ERROR, + context=context + ) + + +class APIRateLimitException(BlogWriterException): + """Raised when API rate limit is exceeded.""" + + def __init__( + self, + message: str, + retry_after: Optional[int] = None, + context: Optional[Dict[str, Any]] = None + ): + retry_message = f"Rate limit exceeded. Please wait {retry_after} seconds before trying again." if retry_after else "Rate limit exceeded. Please wait a few minutes before trying again." + + super().__init__( + message=message, + error_code="API_RATE_LIMIT", + user_message=retry_message, + retry_suggested=True, + actionable_steps=[ + f"Wait {retry_after or 60} seconds before trying again", + "Reduce the frequency of requests", + "Try again during off-peak hours", + "Contact support if you need higher limits" + ], + error_category=ErrorCategory.API_ERROR, + context=context + ) + + +class APITimeoutException(BlogWriterException): + """Raised when API request times out.""" + + def __init__( + self, + message: str, + timeout_seconds: int = 60, + context: Optional[Dict[str, Any]] = None + ): + super().__init__( + message=message, + error_code="API_TIMEOUT", + user_message=f"Request timed out after {timeout_seconds} seconds. Please try again.", + retry_suggested=True, + actionable_steps=[ + "Try again with a shorter request", + "Check your internet connection", + "Try again during off-peak hours", + "Contact support if the issue persists" + ], + error_category=ErrorCategory.TRANSIENT, + context=context + ) + + +class ValidationException(BlogWriterException): + """Raised when input validation fails.""" + + def __init__( + self, + message: str, + field: str, + user_message: str = "Invalid input provided. Please check your data and try again.", + context: Optional[Dict[str, Any]] = None + ): + super().__init__( + message=message, + error_code="VALIDATION_ERROR", + user_message=user_message, + retry_suggested=False, + actionable_steps=[ + f"Check the {field} field", + "Ensure all required fields are filled", + "Verify data format is correct", + "Contact support if you need help" + ], + error_category=ErrorCategory.USER_ERROR, + context=context + ) + + +class CircuitBreakerOpenException(BlogWriterException): + """Raised when circuit breaker is open.""" + + def __init__( + self, + message: str, + retry_after: int, + context: Optional[Dict[str, Any]] = None + ): + super().__init__( + message=message, + error_code="CIRCUIT_BREAKER_OPEN", + user_message=f"Service temporarily unavailable. Please wait {retry_after} seconds before trying again.", + retry_suggested=True, + actionable_steps=[ + f"Wait {retry_after} seconds before trying again", + "Try again during off-peak hours", + "Contact support if the issue persists" + ], + error_category=ErrorCategory.TRANSIENT, + context=context + ) + + +class PartialSuccessException(BlogWriterException): + """Raised when operation partially succeeds.""" + + def __init__( + self, + message: str, + partial_results: Dict[str, Any], + failed_operations: List[str], + user_message: str = "Operation partially completed. Some sections were generated successfully.", + context: Optional[Dict[str, Any]] = None + ): + super().__init__( + message=message, + error_code="PARTIAL_SUCCESS", + user_message=user_message, + retry_suggested=True, + actionable_steps=[ + "Review the generated content", + "Retry failed sections individually", + "Contact support if you need help with failed sections" + ], + error_category=ErrorCategory.TRANSIENT, + context=context + ) + self.partial_results = partial_results + self.failed_operations = failed_operations diff --git a/backend/services/blog_writer/logger_config.py b/backend/services/blog_writer/logger_config.py new file mode 100644 index 00000000..f681697b --- /dev/null +++ b/backend/services/blog_writer/logger_config.py @@ -0,0 +1,293 @@ +""" +Structured Logging Configuration for Blog Writer + +Configures structured JSON logging with correlation IDs, context tracking, +and performance metrics for the AI Blog Writer system. +""" + +import json +import uuid +import time +import sys +from typing import Dict, Any, Optional +from contextvars import ContextVar +from loguru import logger +from datetime import datetime + +# Context variables for request tracking +correlation_id: ContextVar[str] = ContextVar('correlation_id', default='') +user_id: ContextVar[str] = ContextVar('user_id', default='') +task_id: ContextVar[str] = ContextVar('task_id', default='') +operation: ContextVar[str] = ContextVar('operation', default='') + + +class BlogWriterLogger: + """Enhanced logger for Blog Writer with structured logging and context tracking.""" + + def __init__(self): + self._setup_logger() + + def _setup_logger(self): + """Configure loguru with structured JSON output.""" + from utils.logger_utils import get_service_logger + return get_service_logger("blog_writer") + + def _json_formatter(self, record): + """Format log record as structured JSON.""" + # Extract context variables + correlation_id_val = correlation_id.get('') + user_id_val = user_id.get('') + task_id_val = task_id.get('') + operation_val = operation.get('') + + # Build structured log entry + log_entry = { + "timestamp": datetime.fromtimestamp(record["time"].timestamp()).isoformat(), + "level": record["level"].name, + "logger": record["name"], + "function": record["function"], + "line": record["line"], + "message": record["message"], + "correlation_id": correlation_id_val, + "user_id": user_id_val, + "task_id": task_id_val, + "operation": operation_val, + "module": record["module"], + "process_id": record["process"].id, + "thread_id": record["thread"].id + } + + # Add exception info if present + if record["exception"]: + log_entry["exception"] = { + "type": record["exception"].type.__name__, + "value": str(record["exception"].value), + "traceback": record["exception"].traceback + } + + # Add extra fields from record + if record["extra"]: + log_entry.update(record["extra"]) + + return json.dumps(log_entry, default=str) + + def set_context( + self, + correlation_id_val: Optional[str] = None, + user_id_val: Optional[str] = None, + task_id_val: Optional[str] = None, + operation_val: Optional[str] = None + ): + """Set context variables for the current request.""" + if correlation_id_val: + correlation_id.set(correlation_id_val) + if user_id_val: + user_id.set(user_id_val) + if task_id_val: + task_id.set(task_id_val) + if operation_val: + operation.set(operation_val) + + def clear_context(self): + """Clear all context variables.""" + correlation_id.set('') + user_id.set('') + task_id.set('') + operation.set('') + + def generate_correlation_id(self) -> str: + """Generate a new correlation ID.""" + return str(uuid.uuid4()) + + def log_operation_start( + self, + operation_name: str, + **kwargs + ): + """Log the start of an operation with context.""" + logger.info( + f"Starting {operation_name}", + extra={ + "operation": operation_name, + "event_type": "operation_start", + **kwargs + } + ) + + def log_operation_end( + self, + operation_name: str, + duration_ms: float, + success: bool = True, + **kwargs + ): + """Log the end of an operation with performance metrics.""" + logger.info( + f"Completed {operation_name} in {duration_ms:.2f}ms", + extra={ + "operation": operation_name, + "event_type": "operation_end", + "duration_ms": duration_ms, + "success": success, + **kwargs + } + ) + + def log_api_call( + self, + api_name: str, + endpoint: str, + duration_ms: float, + status_code: Optional[int] = None, + token_usage: Optional[Dict[str, int]] = None, + **kwargs + ): + """Log API call with performance metrics.""" + logger.info( + f"API call to {api_name}", + extra={ + "event_type": "api_call", + "api_name": api_name, + "endpoint": endpoint, + "duration_ms": duration_ms, + "status_code": status_code, + "token_usage": token_usage, + **kwargs + } + ) + + def log_error( + self, + error: Exception, + operation: str, + context: Optional[Dict[str, Any]] = None + ): + """Log error with full context.""" + logger.error( + f"Error in {operation}: {str(error)}", + extra={ + "event_type": "error", + "operation": operation, + "error_type": type(error).__name__, + "error_message": str(error), + "context": context or {} + }, + exc_info=True + ) + + def log_performance( + self, + metric_name: str, + value: float, + unit: str = "ms", + **kwargs + ): + """Log performance metrics.""" + logger.info( + f"Performance metric: {metric_name} = {value} {unit}", + extra={ + "event_type": "performance", + "metric_name": metric_name, + "value": value, + "unit": unit, + **kwargs + } + ) + + +# Global logger instance +blog_writer_logger = BlogWriterLogger() + + +def get_logger(name: str = "blog_writer"): + """Get a logger instance with the given name.""" + return logger.bind(name=name) + + +def log_function_call(func_name: str, **kwargs): + """Decorator to log function calls with timing.""" + def decorator(func): + async def async_wrapper(*args, **func_kwargs): + start_time = time.time() + correlation_id_val = correlation_id.get('') + + blog_writer_logger.log_operation_start( + func_name, + function=func.__name__, + correlation_id=correlation_id_val, + **kwargs + ) + + try: + result = await func(*args, **func_kwargs) + duration_ms = (time.time() - start_time) * 1000 + + blog_writer_logger.log_operation_end( + func_name, + duration_ms, + success=True, + function=func.__name__, + correlation_id=correlation_id_val + ) + + return result + except Exception as e: + duration_ms = (time.time() - start_time) * 1000 + + blog_writer_logger.log_error( + e, + func_name, + context={ + "function": func.__name__, + "duration_ms": duration_ms, + "correlation_id": correlation_id_val + } + ) + raise + + def sync_wrapper(*args, **func_kwargs): + start_time = time.time() + correlation_id_val = correlation_id.get('') + + blog_writer_logger.log_operation_start( + func_name, + function=func.__name__, + correlation_id=correlation_id_val, + **kwargs + ) + + try: + result = func(*args, **func_kwargs) + duration_ms = (time.time() - start_time) * 1000 + + blog_writer_logger.log_operation_end( + func_name, + duration_ms, + success=True, + function=func.__name__, + correlation_id=correlation_id_val + ) + + return result + except Exception as e: + duration_ms = (time.time() - start_time) * 1000 + + blog_writer_logger.log_error( + e, + func_name, + context={ + "function": func.__name__, + "duration_ms": duration_ms, + "correlation_id": correlation_id_val + } + ) + raise + + # Return appropriate wrapper based on function type + import asyncio + if asyncio.iscoroutinefunction(func): + return async_wrapper + else: + return sync_wrapper + + return decorator diff --git a/backend/services/blog_writer/research/research_service.py b/backend/services/blog_writer/research/research_service.py index 13aba95b..4eac9977 100644 --- a/backend/services/blog_writer/research/research_service.py +++ b/backend/services/blog_writer/research/research_service.py @@ -16,6 +16,7 @@ from models.blog_models import ( GroundingSupport, Citation, ) +from services.blog_writer.logger_config import blog_writer_logger, log_function_call from .keyword_analyzer import KeywordAnalyzer from .competitor_analyzer import CompetitorAnalyzer @@ -32,6 +33,7 @@ class ResearchService: self.content_angle_generator = ContentAngleGenerator() self.data_filter = ResearchDataFilter() + @log_function_call("research_operation") async def research(self, request: BlogResearchRequest) -> BlogResearchResponse: """ Stage 1: Research & Strategy (AI Orchestration) @@ -47,6 +49,16 @@ class ResearchService: industry = request.industry or (request.persona.industry if request.persona and request.persona.industry else "General") target_audience = getattr(request.persona, 'target_audience', 'General') if request.persona else 'General' + # Log research parameters + blog_writer_logger.log_operation_start( + "research", + topic=topic, + industry=industry, + target_audience=target_audience, + keywords=request.keywords, + keyword_count=len(request.keywords) + ) + # Check cache first for exact keyword match cached_result = research_cache.get_cached_result( keywords=request.keywords, @@ -56,10 +68,12 @@ class ResearchService: if cached_result: logger.info(f"Returning cached research result for keywords: {request.keywords}") + blog_writer_logger.log_operation_end("research", 0, success=True, cache_hit=True) return BlogResearchResponse(**cached_result) # Cache miss - proceed with API call logger.info(f"Cache miss - making API call for keywords: {request.keywords}") + blog_writer_logger.log_operation_start("gemini_api_call", api_name="gemini_grounded", operation="research") gemini = GeminiGroundedProvider() # Single comprehensive research prompt - Gemini handles Google Search automatically @@ -82,11 +96,23 @@ class ResearchService: """ # Single Gemini call with native Google Search grounding - no fallbacks + import time + api_start_time = time.time() gemini_result = await gemini.generate_grounded_content( prompt=research_prompt, content_type="research", max_tokens=2000 ) + api_duration_ms = (time.time() - api_start_time) * 1000 + + # Log API call performance + blog_writer_logger.log_api_call( + "gemini_grounded", + "generate_grounded_content", + api_duration_ms, + token_usage=gemini_result.get("token_usage", {}), + content_length=len(gemini_result.get("content", "")) + ) # Extract sources from grounding metadata sources = self._extract_sources_from_grounding(gemini_result) @@ -105,6 +131,17 @@ class ResearchService: suggested_angles = self.content_angle_generator.generate(content, topic, industry) logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries") + + # Log analysis results + blog_writer_logger.log_performance( + "research_analysis", + len(content), + "characters", + sources_count=len(sources), + search_queries_count=len(search_queries), + keyword_analysis_keys=len(keyword_analysis), + suggested_angles_count=len(suggested_angles) + ) # Create the response response = BlogResearchResponse( @@ -146,7 +183,47 @@ class ResearchService: error_message = str(e) logger.error(f"Research failed: {error_message}") - # Return a graceful failure response instead of raising + # Log error with full context + blog_writer_logger.log_error( + e, + "research", + context={ + "topic": topic, + "keywords": request.keywords, + "industry": industry, + "target_audience": target_audience + } + ) + + # Import custom exceptions for better error handling + from services.blog_writer.exceptions import ( + ResearchFailedException, + APIRateLimitException, + APITimeoutException, + ValidationException + ) + + # Determine if this is a retryable error + retry_suggested = True + user_message = "Research failed. Please try again with different keywords or check your internet connection." + + if isinstance(e, APIRateLimitException): + retry_suggested = True + user_message = f"Rate limit exceeded. Please wait {e.context.get('retry_after', 60)} seconds before trying again." + elif isinstance(e, APITimeoutException): + retry_suggested = True + user_message = "Research request timed out. Please try again with a shorter query or check your internet connection." + elif isinstance(e, ValidationException): + retry_suggested = False + user_message = "Invalid research request. Please check your input parameters and try again." + elif "401" in error_message or "403" in error_message: + retry_suggested = False + user_message = "Authentication failed. Please check your API credentials." + elif "400" in error_message: + retry_suggested = False + user_message = "Invalid request. Please check your input parameters." + + # Return a graceful failure response with enhanced error information return BlogResearchResponse( success=False, sources=[], @@ -155,9 +232,18 @@ class ResearchService: suggested_angles=[], search_widget="", search_queries=[], - error_message=error_message + error_message=user_message, + retry_suggested=retry_suggested, + error_code=getattr(e, 'error_code', 'RESEARCH_FAILED'), + actionable_steps=getattr(e, 'actionable_steps', [ + "Try with different keywords", + "Check your internet connection", + "Wait a few minutes and try again", + "Contact support if the issue persists" + ]) ) + @log_function_call("research_with_progress") async def research_with_progress(self, request: BlogResearchRequest, task_id: str) -> BlogResearchResponse: """ Research method with progress updates for real-time feedback. @@ -291,7 +377,47 @@ class ResearchService: error_message = str(e) logger.error(f"Research failed: {error_message}") - # Return a graceful failure response instead of raising + # Log error with full context + blog_writer_logger.log_error( + e, + "research", + context={ + "topic": topic, + "keywords": request.keywords, + "industry": industry, + "target_audience": target_audience + } + ) + + # Import custom exceptions for better error handling + from services.blog_writer.exceptions import ( + ResearchFailedException, + APIRateLimitException, + APITimeoutException, + ValidationException + ) + + # Determine if this is a retryable error + retry_suggested = True + user_message = "Research failed. Please try again with different keywords or check your internet connection." + + if isinstance(e, APIRateLimitException): + retry_suggested = True + user_message = f"Rate limit exceeded. Please wait {e.context.get('retry_after', 60)} seconds before trying again." + elif isinstance(e, APITimeoutException): + retry_suggested = True + user_message = "Research request timed out. Please try again with a shorter query or check your internet connection." + elif isinstance(e, ValidationException): + retry_suggested = False + user_message = "Invalid research request. Please check your input parameters and try again." + elif "401" in error_message or "403" in error_message: + retry_suggested = False + user_message = "Authentication failed. Please check your API credentials." + elif "400" in error_message: + retry_suggested = False + user_message = "Invalid request. Please check your input parameters." + + # Return a graceful failure response with enhanced error information return BlogResearchResponse( success=False, sources=[], @@ -300,7 +426,15 @@ class ResearchService: suggested_angles=[], search_widget="", search_queries=[], - error_message=error_message + error_message=user_message, + retry_suggested=retry_suggested, + error_code=getattr(e, 'error_code', 'RESEARCH_FAILED'), + actionable_steps=getattr(e, 'actionable_steps', [ + "Try with different keywords", + "Check your internet connection", + "Wait a few minutes and try again", + "Contact support if the issue persists" + ]) ) def _extract_sources_from_grounding(self, gemini_result: Dict[str, Any]) -> List[ResearchSource]: diff --git a/backend/services/blog_writer/retry_utils.py b/backend/services/blog_writer/retry_utils.py new file mode 100644 index 00000000..76768084 --- /dev/null +++ b/backend/services/blog_writer/retry_utils.py @@ -0,0 +1,223 @@ +""" +Enhanced Retry Utilities for Blog Writer + +Provides advanced retry logic with exponential backoff, jitter, retry budgets, +and specific error code handling for different types of API failures. +""" + +import asyncio +import random +import time +from typing import Callable, Any, Optional, Dict, List +from dataclasses import dataclass +from loguru import logger + +from .exceptions import APIRateLimitException, APITimeoutException + + +@dataclass +class RetryConfig: + """Configuration for retry behavior.""" + max_attempts: int = 3 + base_delay: float = 1.0 + max_delay: float = 60.0 + exponential_base: float = 2.0 + jitter: bool = True + max_total_time: float = 300.0 # 5 minutes max total time + retryable_errors: List[str] = None + + def __post_init__(self): + if self.retryable_errors is None: + self.retryable_errors = [ + "503", "502", "504", # Server errors + "429", # Rate limit + "timeout", "timed out", + "connection", "network", + "overloaded", "busy" + ] + + +class RetryBudget: + """Tracks retry budget to prevent excessive retries.""" + + def __init__(self, max_total_time: float): + self.max_total_time = max_total_time + self.start_time = time.time() + self.used_time = 0.0 + + def can_retry(self) -> bool: + """Check if we can still retry within budget.""" + self.used_time = time.time() - self.start_time + return self.used_time < self.max_total_time + + def remaining_time(self) -> float: + """Get remaining time in budget.""" + return max(0, self.max_total_time - self.used_time) + + +def is_retryable_error(error: Exception, retryable_errors: List[str]) -> bool: + """Check if an error is retryable based on error message patterns.""" + error_str = str(error).lower() + return any(pattern.lower() in error_str for pattern in retryable_errors) + + +def calculate_delay(attempt: int, config: RetryConfig) -> float: + """Calculate delay for retry attempt with exponential backoff and jitter.""" + # Exponential backoff + delay = config.base_delay * (config.exponential_base ** attempt) + + # Cap at max delay + delay = min(delay, config.max_delay) + + # Add jitter to prevent thundering herd + if config.jitter: + jitter_range = delay * 0.1 # 10% jitter + delay += random.uniform(-jitter_range, jitter_range) + + return max(0, delay) + + +async def retry_with_backoff( + func: Callable, + config: Optional[RetryConfig] = None, + operation_name: str = "operation", + context: Optional[Dict[str, Any]] = None +) -> Any: + """ + Retry a function with enhanced backoff and budget management. + + Args: + func: Async function to retry + config: Retry configuration + operation_name: Name of operation for logging + context: Additional context for logging + + Returns: + Function result + + Raises: + Last exception if all retries fail + """ + config = config or RetryConfig() + budget = RetryBudget(config.max_total_time) + last_exception = None + + for attempt in range(config.max_attempts): + try: + # Check if we're still within budget + if not budget.can_retry(): + logger.warning(f"Retry budget exceeded for {operation_name} after {budget.used_time:.2f}s") + break + + # Execute the function + result = await func() + logger.info(f"{operation_name} succeeded on attempt {attempt + 1}") + return result + + except Exception as e: + last_exception = e + + # Check if this is the last attempt + if attempt == config.max_attempts - 1: + logger.error(f"{operation_name} failed after {config.max_attempts} attempts: {str(e)}") + break + + # Check if error is retryable + if not is_retryable_error(e, config.retryable_errors): + logger.warning(f"{operation_name} failed with non-retryable error: {str(e)}") + break + + # Calculate delay and wait + delay = calculate_delay(attempt, config) + remaining_time = budget.remaining_time() + + # Don't wait longer than remaining budget + if delay > remaining_time: + logger.warning(f"Delay {delay:.2f}s exceeds remaining budget {remaining_time:.2f}s for {operation_name}") + break + + logger.warning( + f"{operation_name} attempt {attempt + 1} failed: {str(e)}. " + f"Retrying in {delay:.2f}s (attempt {attempt + 2}/{config.max_attempts})" + ) + + await asyncio.sleep(delay) + + # If we get here, all retries failed + if last_exception: + # Enhance exception with retry context + if isinstance(last_exception, Exception): + error_str = str(last_exception) + if "429" in error_str or "rate limit" in error_str.lower(): + raise APIRateLimitException( + f"Rate limit exceeded after {config.max_attempts} attempts", + retry_after=int(delay * 2), # Suggest waiting longer + context=context + ) + elif "timeout" in error_str.lower(): + raise APITimeoutException( + f"Request timed out after {config.max_attempts} attempts", + timeout_seconds=int(config.max_total_time), + context=context + ) + + raise last_exception + + raise Exception(f"{operation_name} failed after {config.max_attempts} attempts") + + +def retry_decorator( + config: Optional[RetryConfig] = None, + operation_name: Optional[str] = None +): + """ + Decorator to add retry logic to async functions. + + Args: + config: Retry configuration + operation_name: Name of operation for logging + """ + def decorator(func: Callable) -> Callable: + async def wrapper(*args, **kwargs): + op_name = operation_name or func.__name__ + return await retry_with_backoff( + lambda: func(*args, **kwargs), + config=config, + operation_name=op_name + ) + return wrapper + return decorator + + +# Predefined retry configurations for different operation types +RESEARCH_RETRY_CONFIG = RetryConfig( + max_attempts=3, + base_delay=2.0, + max_delay=30.0, + max_total_time=180.0, # 3 minutes for research + retryable_errors=["503", "429", "timeout", "overloaded", "connection"] +) + +OUTLINE_RETRY_CONFIG = RetryConfig( + max_attempts=2, + base_delay=1.5, + max_delay=20.0, + max_total_time=120.0, # 2 minutes for outline + retryable_errors=["503", "429", "timeout", "overloaded"] +) + +CONTENT_RETRY_CONFIG = RetryConfig( + max_attempts=3, + base_delay=1.0, + max_delay=15.0, + max_total_time=90.0, # 1.5 minutes for content + retryable_errors=["503", "429", "timeout", "overloaded"] +) + +SEO_RETRY_CONFIG = RetryConfig( + max_attempts=2, + base_delay=1.0, + max_delay=10.0, + max_total_time=60.0, # 1 minute for SEO + retryable_errors=["503", "429", "timeout"] +) diff --git a/backend/services/calendar_generation_datasource_framework/data_processing/comprehensive_user_data.py b/backend/services/calendar_generation_datasource_framework/data_processing/comprehensive_user_data.py index 6047744c..9325b194 100644 --- a/backend/services/calendar_generation_datasource_framework/data_processing/comprehensive_user_data.py +++ b/backend/services/calendar_generation_datasource_framework/data_processing/comprehensive_user_data.py @@ -21,7 +21,7 @@ if services_dir not in sys.path: sys.path.insert(0, services_dir) # Import real services - NO FALLBACKS -from services.onboarding_data_service import OnboardingDataService +from services.onboarding.data_service import OnboardingDataService from services.ai_analytics_service import AIAnalyticsService from services.content_gap_analyzer.ai_engine_service import AIEngineService from services.active_strategy_service import ActiveStrategyService diff --git a/backend/services/calendar_generation_datasource_framework/test_validation/step1_validator.py b/backend/services/calendar_generation_datasource_framework/test_validation/step1_validator.py index 339d7ce7..d1ee8601 100644 --- a/backend/services/calendar_generation_datasource_framework/test_validation/step1_validator.py +++ b/backend/services/calendar_generation_datasource_framework/test_validation/step1_validator.py @@ -59,13 +59,8 @@ class Step1Validator: def _setup_logger(self): """Setup structured logging for validation.""" - logger.remove() - logger.add( - sys.stdout, - format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function}:{line} - {message}", - level="INFO" - ) - return logger + from utils.logger_utils import get_service_logger + return get_service_logger("step1_validator") async def validate_step1(self, user_id: int, strategy_id: int) -> Dict[str, Any]: """Execute and validate Step 1 with comprehensive logging.""" diff --git a/backend/services/integrations/bing_oauth.py b/backend/services/integrations/bing_oauth.py index cceb08a8..d0fab361 100644 --- a/backend/services/integrations/bing_oauth.py +++ b/backend/services/integrations/bing_oauth.py @@ -58,7 +58,7 @@ class BingOAuthService: state TEXT NOT NULL UNIQUE, user_id TEXT NOT NULL, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - expires_at TIMESTAMP DEFAULT (datetime('now', '+10 minutes')) + expires_at TIMESTAMP DEFAULT (datetime('now', '+20 minutes')) ) ''') conn.commit() @@ -79,8 +79,8 @@ class BingOAuthService: with sqlite3.connect(self.db_path) as conn: cursor = conn.cursor() cursor.execute(''' - INSERT INTO bing_oauth_states (state, user_id) - VALUES (?, ?) + INSERT INTO bing_oauth_states (state, user_id, expires_at) + VALUES (?, ?, datetime('now', '+20 minutes')) ''', (state, user_id)) conn.commit() @@ -114,17 +114,33 @@ class BingOAuthService: # Validate state parameter with sqlite3.connect(self.db_path) as conn: cursor = conn.cursor() + # First, look up the state regardless of expiry to provide clearer logs cursor.execute(''' - SELECT user_id FROM bing_oauth_states - WHERE state = ? AND expires_at > datetime('now') + SELECT user_id, created_at, expires_at FROM bing_oauth_states + WHERE state = ? ''', (state,)) - result = cursor.fetchone() - - if not result: - logger.error(f"Invalid or expired state parameter: {state}") + row = cursor.fetchone() + + if not row: + # State not found - likely already consumed (deleted) or never issued + logger.error(f"Bing OAuth: State not found or already used. state='{state[:12]}...'") return None - - user_id = result[0] + + user_id, created_at, expires_at = row + # Check expiry explicitly + cursor.execute("SELECT datetime('now') < ?", (expires_at,)) + not_expired = cursor.fetchone()[0] == 1 + if not not_expired: + logger.error( + f"Bing OAuth: State expired. state='{state[:12]}...', user_id='{user_id}', " + f"created_at='{created_at}', expires_at='{expires_at}'" + ) + # Clean up expired state + cursor.execute('DELETE FROM bing_oauth_states WHERE state = ?', (state,)) + conn.commit() + return None + + # Valid, not expired logger.info(f"Bing OAuth: State validated for user {user_id}") # Clean up used state @@ -174,6 +190,36 @@ class BingOAuthService: conn.commit() logger.info(f"Bing OAuth: Token inserted into database for user {user_id}") + # Proactively fetch and cache user sites using the fresh token + try: + headers = {'Authorization': f'Bearer {access_token}'} + response = requests.get( + f"{self.api_base_url}/GetUserSites", + headers={ + **headers, + 'Origin': 'https://www.bing.com', + 'Referer': 'https://www.bing.com/webmasters/' + }, + timeout=15 + ) + sites = [] + if response.status_code == 200: + sites_data = response.json() + if isinstance(sites_data, dict): + if 'd' in sites_data: + d_data = sites_data['d'] + if isinstance(d_data, dict) and 'results' in d_data: + sites = d_data['results'] + elif isinstance(d_data, list): + sites = d_data + elif isinstance(sites_data, list): + sites = sites_data + if sites: + analytics_cache.set('bing_sites', user_id, sites, ttl_override=2*60*60) + logger.info(f"Bing OAuth: Cached {len(sites)} sites for user {user_id} after OAuth callback") + except Exception as site_err: + logger.warning(f"Bing OAuth: Failed to prefetch sites after OAuth callback: {site_err}") + # Invalidate platform status and sites cache since connection status changed # Don't invalidate analytics data cache as it's expensive to regenerate analytics_cache.invalidate('platform_status', user_id) @@ -193,6 +239,31 @@ class BingOAuthService: except Exception as e: logger.error(f"Error handling Bing Webmaster OAuth callback: {e}") return None + + def purge_expired_tokens(self, user_id: str) -> int: + """Delete expired or inactive Bing tokens for a user to avoid refresh loops. + Returns number of rows deleted. + """ + try: + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + # Delete tokens that are expired or explicitly inactive + cursor.execute(''' + DELETE FROM bing_oauth_tokens + WHERE user_id = ? AND (is_active = FALSE OR (expires_at IS NOT NULL AND expires_at <= datetime('now'))) + ''', (user_id,)) + deleted = cursor.rowcount or 0 + conn.commit() + if deleted > 0: + logger.info(f"Bing OAuth: Purged {deleted} expired/inactive tokens for user {user_id}") + else: + logger.info(f"Bing OAuth: No expired/inactive tokens to purge for user {user_id}") + # Invalidate platform status cache so UI updates + analytics_cache.invalidate('platform_status', user_id) + return deleted + except Exception as e: + logger.error(f"Bing OAuth: Error purging expired tokens for user {user_id}: {e}") + return 0 def get_user_tokens(self, user_id: str) -> List[Dict[str, Any]]: """Get all active Bing tokens for a user.""" @@ -223,6 +294,85 @@ class BingOAuthService: except Exception as e: logger.error(f"Error getting Bing tokens for user {user_id}: {e}") return [] + + def get_user_token_status(self, user_id: str) -> Dict[str, Any]: + """Get detailed token status for a user including expired tokens.""" + try: + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + # Get all tokens (active and expired) + cursor.execute(''' + SELECT id, access_token, refresh_token, token_type, expires_at, scope, created_at, is_active + FROM bing_oauth_tokens + WHERE user_id = ? + ORDER BY created_at DESC + ''', (user_id,)) + + all_tokens = [] + active_tokens = [] + expired_tokens = [] + + for row in cursor.fetchall(): + token_data = { + "id": row[0], + "access_token": row[1], + "refresh_token": row[2], + "token_type": row[3], + "expires_at": row[4], + "scope": row[5], + "created_at": row[6], + "is_active": bool(row[7]) + } + all_tokens.append(token_data) + + # Determine expiry using robust parsing and is_active flag + is_active_flag = bool(row[7]) + not_expired = False + try: + expires_at_val = row[4] + if expires_at_val: + # First try Python parsing + try: + dt = datetime.fromisoformat(expires_at_val) if isinstance(expires_at_val, str) else expires_at_val + not_expired = dt > datetime.now() + except Exception: + # Fallback to SQLite comparison + cursor.execute("SELECT datetime('now') < ?", (expires_at_val,)) + not_expired = cursor.fetchone()[0] == 1 + else: + # No expiry stored => consider not expired + not_expired = True + except Exception: + not_expired = False + + if is_active_flag and not_expired: + active_tokens.append(token_data) + else: + expired_tokens.append(token_data) + + return { + "has_tokens": len(all_tokens) > 0, + "has_active_tokens": len(active_tokens) > 0, + "has_expired_tokens": len(expired_tokens) > 0, + "active_tokens": active_tokens, + "expired_tokens": expired_tokens, + "total_tokens": len(all_tokens), + "last_token_date": all_tokens[0]["created_at"] if all_tokens else None + } + + except Exception as e: + logger.error(f"Error getting Bing token status for user {user_id}: {e}") + return { + "has_tokens": False, + "has_active_tokens": False, + "has_expired_tokens": False, + "active_tokens": [], + "expired_tokens": [], + "total_tokens": 0, + "last_token_date": None, + "error": str(e) + } def test_token(self, access_token: str) -> bool: """Test if a Bing access token is valid.""" @@ -264,7 +414,7 @@ class BingOAuthService: } response = requests.post( - f"{self.base_url}/webmasters/token", + f"{self.base_url}/webmasters/oauth/token", data=token_data, headers={ 'Content-Type': 'application/x-www-form-urlencoded', @@ -291,12 +441,19 @@ class BingOAuthService: cursor = conn.cursor() cursor.execute(''' UPDATE bing_oauth_tokens - SET access_token = ?, expires_at = ?, updated_at = datetime('now') + SET access_token = ?, expires_at = ?, is_active = TRUE, updated_at = datetime('now') WHERE user_id = ? AND refresh_token = ? ''', (access_token, expires_at, user_id, refresh_token)) conn.commit() logger.info(f"Bing access token refreshed for user {user_id}") + + # Invalidate caches that depend on token validity + try: + analytics_cache.invalidate('platform_status', user_id) + analytics_cache.invalidate('bing_sites', user_id) + except Exception as _: + pass return { "access_token": access_token, "expires_in": expires_in, @@ -382,6 +539,15 @@ class BingOAuthService: def get_user_sites(self, user_id: str) -> List[Dict[str, Any]]: """Get list of user's verified sites from Bing Webmaster.""" try: + # Fast path: return cached sites if available + try: + cached_sites = analytics_cache.get('bing_sites', user_id) + if cached_sites: + logger.info(f"Bing get_user_sites: Returning {len(cached_sites)} cached sites for user {user_id}") + return cached_sites + except Exception: + pass + tokens = self.get_user_tokens(user_id) logger.info(f"Bing get_user_sites: Found {len(tokens)} tokens for user {user_id}") if not tokens: @@ -453,6 +619,11 @@ class BingOAuthService: logger.info(f"Bing get_user_sites: Found {len(sites)} sites from token") all_sites.extend(sites) + # Cache sites immediately for future calls + try: + analytics_cache.set('bing_sites', user_id, all_sites, ttl_override=2*60*60) + except Exception: + pass except Exception as e: logger.error(f"Error getting Bing user sites: {e}") @@ -473,13 +644,20 @@ class BingOAuthService: try: with sqlite3.connect(self.db_path) as conn: cursor = conn.cursor() + # Compute expires_at from expires_in if expires_at missing + expires_at_value = refreshed_token.get("expires_at") + if not expires_at_value and refreshed_token.get("expires_in"): + try: + expires_at_value = datetime.now() + timedelta(seconds=int(refreshed_token["expires_in"])) + except Exception: + expires_at_value = None cursor.execute(''' UPDATE bing_oauth_tokens - SET access_token = ?, expires_at = ?, updated_at = datetime('now') + SET access_token = ?, expires_at = ?, is_active = TRUE, updated_at = datetime('now') WHERE id = ? ''', ( refreshed_token["access_token"], - refreshed_token.get("expires_at"), + expires_at_value, token_id )) conn.commit() diff --git a/backend/services/linkedin/image_generation/linkedin_image_editor.py b/backend/services/linkedin/image_generation/linkedin_image_editor.py index 3a75cf82..f08a0db6 100644 --- a/backend/services/linkedin/image_generation/linkedin_image_editor.py +++ b/backend/services/linkedin/image_generation/linkedin_image_editor.py @@ -15,7 +15,7 @@ from io import BytesIO from loguru import logger # Import existing infrastructure -from ...api_key_manager import APIKeyManager +from ...onboarding.api_key_manager import APIKeyManager class LinkedInImageEditor: diff --git a/backend/services/linkedin/image_generation/linkedin_image_generator.py b/backend/services/linkedin/image_generation/linkedin_image_generator.py index f4ac5e2b..a14362b2 100644 --- a/backend/services/linkedin/image_generation/linkedin_image_generator.py +++ b/backend/services/linkedin/image_generation/linkedin_image_generator.py @@ -15,7 +15,7 @@ from PIL import Image from io import BytesIO # Import existing infrastructure -from ...api_key_manager import APIKeyManager +from ...onboarding.api_key_manager import APIKeyManager from ...llm_providers.text_to_image_generation.gen_gemini_images import generate_gemini_image # Set up logging diff --git a/backend/services/linkedin/image_generation/linkedin_image_storage.py b/backend/services/linkedin/image_generation/linkedin_image_storage.py index 2d82c321..02ae9388 100644 --- a/backend/services/linkedin/image_generation/linkedin_image_storage.py +++ b/backend/services/linkedin/image_generation/linkedin_image_storage.py @@ -16,7 +16,7 @@ from io import BytesIO from loguru import logger # Import existing infrastructure -from ...api_key_manager import APIKeyManager +from ...onboarding.api_key_manager import APIKeyManager class LinkedInImageStorage: diff --git a/backend/services/linkedin/image_prompts/linkedin_prompt_generator.py b/backend/services/linkedin/image_prompts/linkedin_prompt_generator.py index b3d46366..7aa45bda 100644 --- a/backend/services/linkedin/image_prompts/linkedin_prompt_generator.py +++ b/backend/services/linkedin/image_prompts/linkedin_prompt_generator.py @@ -12,7 +12,7 @@ from datetime import datetime from loguru import logger # Import existing infrastructure -from ...api_key_manager import APIKeyManager +from ...onboarding.api_key_manager import APIKeyManager from ...llm_providers.gemini_provider import gemini_text_response diff --git a/backend/services/llm_providers/anthropic_provider.py b/backend/services/llm_providers/anthropic_provider.py index 1690c25d..adffd35e 100644 --- a/backend/services/llm_providers/anthropic_provider.py +++ b/backend/services/llm_providers/anthropic_provider.py @@ -16,7 +16,7 @@ from tenacity import ( ) # Import APIKeyManager -from ..api_key_manager import APIKeyManager +from ..onboarding.api_key_manager import APIKeyManager try: import anthropic diff --git a/backend/services/llm_providers/audio_to_text_generation/gemini_audio_text.py b/backend/services/llm_providers/audio_to_text_generation/gemini_audio_text.py index 3fa80808..b1dd9181 100644 --- a/backend/services/llm_providers/audio_to_text_generation/gemini_audio_text.py +++ b/backend/services/llm_providers/audio_to_text_generation/gemini_audio_text.py @@ -70,11 +70,10 @@ from google.genai import types from loguru import logger -logger.remove() -logger.add(sys.stdout, - colorize=True, - format="{level}|{file}:{line}:{function}| {message}" - ) +from utils.logger_utils import get_service_logger + +# Use service-specific logger to avoid conflicts +logger = get_service_logger("gemini_audio_text") def load_environment(): diff --git a/backend/services/llm_providers/audio_to_text_generation/stt_audio_blog.py b/backend/services/llm_providers/audio_to_text_generation/stt_audio_blog.py index 34b6b4cc..62d40ea0 100644 --- a/backend/services/llm_providers/audio_to_text_generation/stt_audio_blog.py +++ b/backend/services/llm_providers/audio_to_text_generation/stt_audio_blog.py @@ -18,7 +18,7 @@ from tenacity import ( from .gemini_audio_text import transcribe_audio # Import APIKeyManager -from ...api_key_manager import APIKeyManager +from ...onboarding.api_key_manager import APIKeyManager def progress_function(stream, chunk, bytes_remaining): diff --git a/backend/services/llm_providers/deepseek_provider.py b/backend/services/llm_providers/deepseek_provider.py index e7606486..488b57bf 100644 --- a/backend/services/llm_providers/deepseek_provider.py +++ b/backend/services/llm_providers/deepseek_provider.py @@ -16,7 +16,7 @@ from tenacity import ( ) # Import APIKeyManager -from ..api_key_manager import APIKeyManager +from ..onboarding.api_key_manager import APIKeyManager try: import openai diff --git a/backend/services/llm_providers/gemini_grounded_provider.py b/backend/services/llm_providers/gemini_grounded_provider.py index 0f43af39..69ae349e 100644 --- a/backend/services/llm_providers/gemini_grounded_provider.py +++ b/backend/services/llm_providers/gemini_grounded_provider.py @@ -131,30 +131,49 @@ class GeminiGroundedProvider: ) self._cache[cache_key] = response except asyncio.TimeoutError: - raise Exception(f"Gemini API request timed out after {self.timeout} seconds") + from services.blog_writer.exceptions import APITimeoutException + raise APITimeoutException( + f"Gemini API request timed out after {self.timeout} seconds", + timeout_seconds=self.timeout, + context={"content_type": content_type, "model_id": model_id} + ) except Exception as api_error: - # Handle specific Google API errors with retry logic + # Handle specific Google API errors with enhanced retry logic error_str = str(api_error) - if "503" in error_str and "overloaded" in error_str: - # Conservative retry for overloaded service (expensive API calls) - response = await self._retry_with_backoff( - lambda: self._make_api_request_with_model(grounded_prompt, config, model_id, urls), - max_retries=1, # Only 1 retry to avoid excessive costs - base_delay=5 # Longer delay + + # Non-retryable errors + if "401" in error_str or "403" in error_str: + from services.blog_writer.exceptions import ValidationException + raise ValidationException( + "Authentication failed. Please check your API credentials.", + field="api_key", + context={"error": error_str, "content_type": content_type} ) - elif "429" in error_str: - # Conservative retry for rate limits - response = await self._retry_with_backoff( - lambda: self._make_api_request_with_model(grounded_prompt, config, model_id, urls), - max_retries=1, # Only 1 retry - base_delay=10 # Much longer delay for rate limits - ) - elif "401" in error_str or "403" in error_str: - raise Exception("Authentication failed. Please check your API credentials.") elif "400" in error_str: - raise Exception("Invalid request. Please check your input parameters.") - else: - raise Exception(f"Google AI service error: {error_str}") + from services.blog_writer.exceptions import ValidationException + raise ValidationException( + "Invalid request. Please check your input parameters.", + field="request", + context={"error": error_str, "content_type": content_type} + ) + + # Retryable errors - use enhanced retry logic + from services.blog_writer.retry_utils import retry_with_backoff, RESEARCH_RETRY_CONFIG + + try: + response = await retry_with_backoff( + lambda: self._make_api_request_with_model(grounded_prompt, config, model_id, urls), + config=RESEARCH_RETRY_CONFIG, + operation_name=f"gemini_grounded_{content_type}", + context={"content_type": content_type, "model_id": model_id} + ) + except Exception as retry_error: + # If retry also failed, raise the original error with context + from services.blog_writer.exceptions import ResearchFailedException + raise ResearchFailedException( + f"Google AI service error after retries: {error_str}", + context={"original_error": error_str, "retry_error": str(retry_error), "content_type": content_type} + ) # Process the grounded response result = self._process_grounded_response(response, content_type) diff --git a/backend/services/llm_providers/gemini_provider.py b/backend/services/llm_providers/gemini_provider.py index 16bcc98b..d6da5432 100644 --- a/backend/services/llm_providers/gemini_provider.py +++ b/backend/services/llm_providers/gemini_provider.py @@ -77,11 +77,10 @@ else: print(f"No .env found at {env_path}, using current directory") from loguru import logger -logger.remove() -logger.add(sys.stdout, - colorize=True, - format="{level}|{file}:{line}:{function}| {message}" - ) +from utils.logger_utils import get_service_logger + +# Use service-specific logger to avoid conflicts +logger = get_service_logger("gemini_provider") from tenacity import ( retry, stop_after_attempt, @@ -389,17 +388,65 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9, ) logger.info("πŸš€ Making Gemini API call...") - try: - response = client.models.generate_content( + + # Use enhanced retry logic for structured JSON calls + from services.blog_writer.retry_utils import retry_with_backoff, CONTENT_RETRY_CONFIG + + async def make_api_call(): + return client.models.generate_content( model="gemini-2.5-flash", contents=prompt, config=generation_config, ) + + try: + # Convert sync call to async for retry logic + import asyncio + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + response = loop.run_until_complete( + retry_with_backoff( + make_api_call, + config=CONTENT_RETRY_CONFIG, + operation_name="gemini_structured_json", + context={"schema_type": type(types_schema).__name__, "max_tokens": max_tokens} + ) + ) logger.info("βœ… Gemini API call completed successfully") except Exception as api_error: logger.error(f"❌ Gemini API call failed: {api_error}") logger.error(f"❌ API Error type: {type(api_error).__name__}") - raise api_error + + # Enhance error with specific exception types + error_str = str(api_error) + if "429" in error_str or "rate limit" in error_str.lower(): + from services.blog_writer.exceptions import APIRateLimitException + raise APIRateLimitException( + f"Rate limit exceeded for structured JSON generation: {error_str}", + retry_after=60, + context={"operation": "structured_json", "max_tokens": max_tokens} + ) + elif "timeout" in error_str.lower(): + from services.blog_writer.exceptions import APITimeoutException + raise APITimeoutException( + f"Structured JSON generation timed out: {error_str}", + timeout_seconds=60, + context={"operation": "structured_json", "max_tokens": max_tokens} + ) + elif "401" in error_str or "403" in error_str: + from services.blog_writer.exceptions import ValidationException + raise ValidationException( + "Authentication failed for structured JSON generation. Please check your API credentials.", + field="api_key", + context={"error": error_str, "operation": "structured_json"} + ) + else: + from services.blog_writer.exceptions import ContentGenerationException + raise ContentGenerationException( + f"Structured JSON generation failed: {error_str}", + context={"error": error_str, "operation": "structured_json", "max_tokens": max_tokens} + ) # Check for parsed content first (primary method for structured output) if hasattr(response, 'parsed'): diff --git a/backend/services/llm_providers/image_to_text_gen/gemini_image_describe.py b/backend/services/llm_providers/image_to_text_gen/gemini_image_describe.py index 6ead2274..e2ea95a4 100644 --- a/backend/services/llm_providers/image_to_text_gen/gemini_image_describe.py +++ b/backend/services/llm_providers/image_to_text_gen/gemini_image_describe.py @@ -15,14 +15,13 @@ from google.genai import types from PIL import Image from loguru import logger -logger.remove() -logger.add(sys.stdout, - colorize=True, - format="{level}|{file}:{line}:{function}| {message}" - ) +from utils.logger_utils import get_service_logger + +# Use service-specific logger to avoid conflicts +logger = get_service_logger("gemini_image_describe") # Import APIKeyManager -from ...api_key_manager import APIKeyManager +from ...onboarding.api_key_manager import APIKeyManager try: import google.generativeai as genai diff --git a/backend/services/llm_providers/main_text_generation.py b/backend/services/llm_providers/main_text_generation.py index b7f6d09d..cc573630 100644 --- a/backend/services/llm_providers/main_text_generation.py +++ b/backend/services/llm_providers/main_text_generation.py @@ -8,7 +8,7 @@ import os import json from typing import Optional, Dict, Any from loguru import logger -from ..api_key_manager import APIKeyManager +from ..onboarding.api_key_manager import APIKeyManager from .openai_provider import openai_chatgpt from .gemini_provider import gemini_text_response, gemini_structured_json_response diff --git a/backend/services/llm_providers/openai_provider.py b/backend/services/llm_providers/openai_provider.py index a44c473b..975fa4c1 100644 --- a/backend/services/llm_providers/openai_provider.py +++ b/backend/services/llm_providers/openai_provider.py @@ -17,7 +17,7 @@ from tenacity import ( ) # Import APIKeyManager -from ..api_key_manager import APIKeyManager +from ..onboarding.api_key_manager import APIKeyManager async def test_openai_api_key(api_key: str) -> Tuple[bool, str]: """ diff --git a/backend/services/llm_providers/text_to_image_generation/gen_gemini_images.py b/backend/services/llm_providers/text_to_image_generation/gen_gemini_images.py index 3ee978bc..f5e5a3d8 100644 --- a/backend/services/llm_providers/text_to_image_generation/gen_gemini_images.py +++ b/backend/services/llm_providers/text_to_image_generation/gen_gemini_images.py @@ -10,7 +10,7 @@ from io import BytesIO import logging # Import APIKeyManager -from ...api_key_manager import APIKeyManager +from ...onboarding.api_key_manager import APIKeyManager try: from google import genai diff --git a/backend/services/llm_providers/text_to_image_generation/gen_stabl_diff_img.py b/backend/services/llm_providers/text_to_image_generation/gen_stabl_diff_img.py index 94979d06..b6187f4c 100644 --- a/backend/services/llm_providers/text_to_image_generation/gen_stabl_diff_img.py +++ b/backend/services/llm_providers/text_to_image_generation/gen_stabl_diff_img.py @@ -12,7 +12,7 @@ import streamlit as st from loguru import logger # Import APIKeyManager -from ...api_key_manager import APIKeyManager +from ...onboarding.api_key_manager import APIKeyManager def save_generated_image(data): """Save the generated image to a file.""" diff --git a/backend/services/llm_providers/text_to_image_generation/main_generate_image_from_prompt.py b/backend/services/llm_providers/text_to_image_generation/main_generate_image_from_prompt.py index 6bc6e213..bad107c8 100644 --- a/backend/services/llm_providers/text_to_image_generation/main_generate_image_from_prompt.py +++ b/backend/services/llm_providers/text_to_image_generation/main_generate_image_from_prompt.py @@ -15,11 +15,10 @@ import streamlit as st import openai # OpenAI Python library to make API calls from loguru import logger -logger.remove() -logger.add(sys.stdout, - colorize=True, - format="{level}|{file}:{line}:{function}| {message}" - ) +from utils.logger_utils import get_service_logger + +# Use service-specific logger to avoid conflicts +logger = get_service_logger("text_to_image_generation") #from .gen_dali2_images from .gen_dali3_images import generate_dalle3_images diff --git a/backend/services/onboarding/README.md b/backend/services/onboarding/README.md new file mode 100644 index 00000000..c4569727 --- /dev/null +++ b/backend/services/onboarding/README.md @@ -0,0 +1,204 @@ +# Onboarding Services Package + +This package contains all onboarding-related services and utilities for ALwrity. All onboarding data is stored in the database with proper user isolation, replacing the previous file-based JSON storage system. + +## Architecture + +### Database-First Design +- **Primary Storage**: PostgreSQL database with proper foreign keys and relationships +- **User Isolation**: Each user's onboarding data is completely separate +- **No File Storage**: Removed all JSON file operations for production scalability +- **Local Development**: API keys still written to `.env` for developer convenience + +### Service Structure + +``` +backend/services/onboarding/ +β”œβ”€β”€ __init__.py # Package exports +β”œβ”€β”€ database_service.py # Core database operations +β”œβ”€β”€ progress_service.py # Progress tracking and step management +β”œβ”€β”€ data_service.py # Data validation and processing +β”œβ”€β”€ api_key_manager.py # API key management + progress tracking +└── README.md # This documentation +``` + +## Services + +### 1. OnboardingDatabaseService (`database_service.py`) +**Purpose**: Core database operations for onboarding data with user isolation. + +**Key Features**: +- User-specific session management +- API key storage and retrieval +- Website analysis persistence +- Research preferences management +- Persona data storage +- Brand analysis support (feature-flagged) + +**Main Methods**: +- `get_or_create_session(user_id)` - Get or create user session +- `save_api_key(user_id, provider, key)` - Store API keys +- `save_website_analysis(user_id, data)` - Store website analysis +- `save_research_preferences(user_id, prefs)` - Store research settings +- `save_persona_data(user_id, data)` - Store persona information + +### 2. OnboardingProgressService (`progress_service.py`) +**Purpose**: High-level progress tracking and step management. + +**Key Features**: +- Database-only progress tracking +- Step completion validation +- Progress percentage calculation +- Onboarding completion management + +**Main Methods**: +- `get_onboarding_status(user_id)` - Get current status +- `update_step(user_id, step_number)` - Update current step +- `update_progress(user_id, percentage)` - Update progress +- `complete_onboarding(user_id)` - Mark as complete + +### 3. OnboardingDataService (`data_service.py`) +**Purpose**: Extract and use onboarding data for AI personalization. + +**Key Features**: +- Personalized AI input generation +- Website analysis data extraction +- Research preferences integration +- Default fallback data + +**Main Methods**: +- `get_personalized_ai_inputs(user_id)` - Generate personalized inputs +- `get_user_website_analysis(user_id)` - Get website data +- `get_user_research_preferences(user_id)` - Get research settings + +### 4. OnboardingProgress + APIKeyManager (`api_key_manager.py`) +**Purpose**: Combined API key management and progress tracking with database persistence. + +**Key Features**: +- Database-only progress persistence (no JSON files) +- API key management with environment integration +- Step-by-step progress tracking +- User-specific progress instances + +**Main Classes**: +- `OnboardingProgress` - Progress tracking with database persistence +- `APIKeyManager` - API key management +- `StepData` - Individual step data structure +- `StepStatus` - Step status enumeration + +## Database Schema + +### Core Tables +- `onboarding_sessions` - User session tracking +- `api_keys` - User-specific API key storage +- `website_analyses` - Website analysis data +- `research_preferences` - User research settings +- `persona_data` - Generated persona information + +### Relationships +- All data tables reference `onboarding_sessions.id` +- User isolation via `user_id` foreign key +- Proper cascade deletion and updates + +## Usage Examples + +### Basic Progress Tracking +```python +from services.onboarding import OnboardingProgress + +# Get user-specific progress +progress = OnboardingProgress(user_id="user123") + +# Mark step as completed +progress.mark_step_completed(1, {"api_keys": {"openai": "sk-..."}}) + +# Get progress summary +summary = progress.get_progress_summary() +``` + +### Database Operations +```python +from services.onboarding import OnboardingDatabaseService +from services.database import SessionLocal + +db = SessionLocal() +service = OnboardingDatabaseService(db) + +# Save API key +service.save_api_key("user123", "openai", "sk-...") + +# Get website analysis +analysis = service.get_website_analysis("user123", db) +``` + +### Progress Service +```python +from services.onboarding import OnboardingProgressService + +service = OnboardingProgressService() + +# Get status +status = service.get_onboarding_status("user123") + +# Update progress +service.update_step("user123", 2) +service.update_progress("user123", 50.0) +``` + +## Migration from File-Based Storage + +### What Was Removed +- JSON file operations (`.onboarding_progress*.json`) +- File-based progress persistence +- Dual persistence system (file + database) + +### What Was Kept +- Database persistence (enhanced) +- Local development `.env` API key writing +- All existing functionality and APIs + +### Benefits +- **Production Ready**: No ephemeral file storage +- **Scalable**: Database-backed with proper indexing +- **User Isolated**: Complete data separation +- **Maintainable**: Single source of truth + +## Environment Variables + +### Required +- Database connection (via `services.database`) +- User authentication system + +### Optional +- `ENABLE_WEBSITE_BRAND_COLUMNS=true` - Enable brand analysis features +- `DEPLOY_ENV=local` - Enable local `.env` API key writing + +## Error Handling + +All services include comprehensive error handling: +- Database connection failures +- User not found scenarios +- Invalid data validation +- Graceful fallbacks to defaults + +## Performance Considerations + +- Database queries are optimized with proper indexing +- User-specific caching where appropriate +- Minimal database calls through efficient service design +- Connection pooling via SQLAlchemy + +## Testing + +Each service can be tested independently: +- Unit tests for individual methods +- Integration tests with database +- Mock database sessions for isolated testing + +## Future Enhancements + +- Real-time progress updates via WebSocket +- Progress analytics and reporting +- Bulk user operations +- Advanced validation rules +- Progress recovery mechanisms diff --git a/backend/services/onboarding/__init__.py b/backend/services/onboarding/__init__.py new file mode 100644 index 00000000..9338d7d1 --- /dev/null +++ b/backend/services/onboarding/__init__.py @@ -0,0 +1,35 @@ +""" +Onboarding Services Package + +This package contains all onboarding-related services and utilities. +All onboarding data is stored in the database with proper user isolation. + +Services: +- OnboardingDatabaseService: Core database operations for onboarding data +- OnboardingProgressService: Progress tracking and step management +- OnboardingDataService: Data validation and processing +- OnboardingProgress: Progress tracking with database persistence (from api_key_manager) + +Architecture: +- Database-first: All data stored in PostgreSQL with proper foreign keys +- User isolation: Each user's data is completely separate +- No file storage: Removed all JSON file operations for production scalability +- Local development: API keys still written to .env for convenience +""" + +# Import all public classes for easy access +from .database_service import OnboardingDatabaseService +from .progress_service import OnboardingProgressService +from .data_service import OnboardingDataService +from .api_key_manager import OnboardingProgress, APIKeyManager, get_onboarding_progress, get_user_onboarding_progress, get_onboarding_progress_for_user + +__all__ = [ + 'OnboardingDatabaseService', + 'OnboardingProgressService', + 'OnboardingDataService', + 'OnboardingProgress', + 'APIKeyManager', + 'get_onboarding_progress', + 'get_user_onboarding_progress', + 'get_onboarding_progress_for_user' +] diff --git a/backend/services/onboarding/api_key_manager.py b/backend/services/onboarding/api_key_manager.py new file mode 100644 index 00000000..2c7837bf --- /dev/null +++ b/backend/services/onboarding/api_key_manager.py @@ -0,0 +1,476 @@ +""" +API Key Manager with Database-Only Onboarding Progress +Manages API keys and onboarding progress with database persistence only. +Removed all file-based JSON storage for production scalability. +""" + +import os +import json +from typing import Dict, Any, Optional, List +from datetime import datetime +from loguru import logger +from enum import Enum + +from services.database import get_db_session + + +class StepStatus(Enum): + """Onboarding step status.""" + PENDING = "pending" + IN_PROGRESS = "in_progress" + COMPLETED = "completed" + SKIPPED = "skipped" + FAILED = "failed" + + +class StepData: + """Data structure for onboarding step.""" + + def __init__(self, step_number: int, title: str, description: str, status: StepStatus = StepStatus.PENDING): + self.step_number = step_number + self.title = title + self.description = description + self.status = status + self.completed_at = None + self.data = None + self.validation_errors = [] + + +class OnboardingProgress: + """Manages onboarding progress with database persistence only.""" + + def __init__(self, user_id: Optional[str] = None): + self.steps = self._initialize_steps() + self.current_step = 1 + self.started_at = datetime.now().isoformat() + self.last_updated = datetime.now().isoformat() + self.is_completed = False + self.completed_at = None + self.user_id = user_id # Add user_id for database isolation + + # Initialize database service for persistence + try: + from .database_service import OnboardingDatabaseService + self.db_service = OnboardingDatabaseService() + self.use_database = True + logger.info(f"Database service initialized for user {user_id}") + except Exception as e: + logger.error(f"Database service not available: {e}") + self.db_service = None + self.use_database = False + raise Exception(f"Database service required but not available: {e}") + + # Load existing progress from database if available + if self.use_database and self.user_id: + self.load_progress_from_db() + + def _initialize_steps(self) -> List[StepData]: + """Initialize the 6-step onboarding process.""" + return [ + StepData(1, "AI LLM Providers", "Configure AI language model providers", StepStatus.PENDING), + StepData(2, "Website Analysis", "Set up website analysis and crawling", StepStatus.PENDING), + StepData(3, "AI Research", "Configure AI research capabilities", StepStatus.PENDING), + StepData(4, "Personalization", "Set up personalization features", StepStatus.PENDING), + StepData(5, "Integrations", "Configure ALwrity integrations", StepStatus.PENDING), + StepData(6, "Complete Setup", "Finalize and complete onboarding", StepStatus.PENDING) + ] + + def get_step_data(self, step_number: int) -> Optional[StepData]: + """Get data for a specific step.""" + for step in self.steps: + if step.step_number == step_number: + return step + return None + + def mark_step_completed(self, step_number: int, data: Optional[Dict[str, Any]] = None): + """Mark a step as completed.""" + logger.info(f"[mark_step_completed] Marking step {step_number} as completed") + step = self.get_step_data(step_number) + if step: + step.status = StepStatus.COMPLETED + step.completed_at = datetime.now().isoformat() + step.data = data + self.last_updated = datetime.now().isoformat() + + # Check if all steps are now completed + all_completed = all(s.status in [StepStatus.COMPLETED, StepStatus.SKIPPED] for s in self.steps) + + if all_completed: + # If all steps are completed, mark onboarding as complete + self.is_completed = True + self.completed_at = datetime.now().isoformat() + self.current_step = len(self.steps) # Set to last step number + logger.info(f"[mark_step_completed] All steps completed, marking onboarding as complete") + else: + # Only increment current_step if there are more steps to go + self.current_step = step_number + 1 + # Ensure current_step doesn't exceed total steps + if self.current_step > len(self.steps): + self.current_step = len(self.steps) + + logger.info(f"[mark_step_completed] Step {step_number} completed, new current_step: {self.current_step}, is_completed: {self.is_completed}") + self.save_progress() + logger.info(f"Step {step_number} marked as completed") + else: + logger.error(f"[mark_step_completed] Step {step_number} not found") + + def mark_step_in_progress(self, step_number: int): + """Mark a step as in progress.""" + step = self.get_step_data(step_number) + if step: + step.status = StepStatus.IN_PROGRESS + self.current_step = step_number + self.last_updated = datetime.now().isoformat() + self.save_progress() + logger.info(f"Step {step_number} marked as in progress") + else: + logger.error(f"Step {step_number} not found") + + def mark_step_skipped(self, step_number: int): + """Mark a step as skipped.""" + step = self.get_step_data(step_number) + if step: + step.status = StepStatus.SKIPPED + step.completed_at = datetime.now().isoformat() + self.last_updated = datetime.now().isoformat() + self.save_progress() + logger.info(f"Step {step_number} marked as skipped") + else: + logger.error(f"Step {step_number} not found") + + def mark_step_failed(self, step_number: int, error_message: str): + """Mark a step as failed with error message.""" + step = self.get_step_data(step_number) + if step: + step.status = StepStatus.FAILED + step.validation_errors.append(error_message) + self.last_updated = datetime.now().isoformat() + self.save_progress() + logger.error(f"Step {step_number} marked as failed: {error_message}") + else: + logger.error(f"Step {step_number} not found") + + def get_progress_summary(self) -> Dict[str, Any]: + """Get current progress summary.""" + completed_count = sum(1 for s in self.steps if s.status == StepStatus.COMPLETED) + skipped_count = sum(1 for s in self.steps if s.status == StepStatus.SKIPPED) + failed_count = sum(1 for s in self.steps if s.status == StepStatus.FAILED) + + return { + "total_steps": len(self.steps), + "completed_steps": completed_count, + "skipped_steps": skipped_count, + "failed_steps": failed_count, + "current_step": self.current_step, + "is_completed": self.is_completed, + "progress_percentage": (completed_count + skipped_count) / len(self.steps) * 100 + } + + def get_next_step(self) -> Optional[StepData]: + """Get the next step to work on.""" + for step in self.steps: + if step.status == StepStatus.PENDING: + return step + return None + + def get_completed_steps(self) -> List[StepData]: + """Get all completed steps.""" + return [step for step in self.steps if step.status == StepStatus.COMPLETED] + + def get_failed_steps(self) -> List[StepData]: + """Get all failed steps.""" + return [step for step in self.steps if step.status == StepStatus.FAILED] + + def reset_step(self, step_number: int): + """Reset a step to pending status.""" + step = self.get_step_data(step_number) + if step: + step.status = StepStatus.PENDING + step.completed_at = None + step.data = None + step.validation_errors = [] + self.last_updated = datetime.now().isoformat() + self.save_progress() + logger.info(f"Step {step_number} reset to pending") + else: + logger.error(f"Step {step_number} not found") + + def reset_all_steps(self): + """Reset all steps to pending status.""" + for step in self.steps: + step.status = StepStatus.PENDING + step.completed_at = None + step.data = None + step.validation_errors = [] + + self.current_step = 1 + self.is_completed = False + self.completed_at = None + self.last_updated = datetime.now().isoformat() + self.save_progress() + logger.info("All steps reset to pending") + + def complete_onboarding(self): + """Mark onboarding as complete.""" + self.is_completed = True + self.completed_at = datetime.now().isoformat() + self.current_step = len(self.steps) + self.last_updated = datetime.now().isoformat() + self.save_progress() + logger.info("Onboarding completed successfully") + + def save_progress(self): + """Save progress to database.""" + if not self.use_database or not self.db_service or not self.user_id: + logger.error("Cannot save progress: database service not available or user_id not set") + return + + try: + from services.database import SessionLocal + db = SessionLocal() + try: + # Update session progress + self.db_service.update_step(self.user_id, self.current_step, db) + + # Calculate progress percentage + completed_count = sum(1 for s in self.steps if s.status == StepStatus.COMPLETED) + progress_pct = (completed_count / len(self.steps)) * 100 + self.db_service.update_progress(self.user_id, progress_pct, db) + + # Save step-specific data to appropriate tables + for step in self.steps: + if step.status == StepStatus.COMPLETED and step.data: + if step.step_number == 1: # API Keys + api_keys = step.data.get('api_keys', {}) + for provider, key in api_keys.items(): + if key: + # Save to database (for user isolation in production) + self.db_service.save_api_key(self.user_id, provider, key, db) + + # Also save to .env file ONLY in local development + # This allows local developers to have keys in .env for convenience + # In production, keys are fetched from database per user + is_local = os.getenv('DEPLOY_ENV', 'local') == 'local' + if is_local: + try: + from services.api_key_manager import APIKeyManager + api_key_manager = APIKeyManager() + api_key_manager.save_api_key(provider, key) + logger.info(f"[LOCAL] API key for {provider} saved to .env file") + except Exception as env_error: + logger.warning(f"[LOCAL] Failed to save {provider} API key to .env file: {env_error}") + else: + logger.info(f"[PRODUCTION] API key for {provider} saved to database only (user: {self.user_id})") + + # Log database save confirmation + logger.info(f"βœ… DATABASE: API key for {provider} saved to database for user {self.user_id}") + elif step.step_number == 2: # Website Analysis + self.db_service.save_website_analysis(self.user_id, step.data, db) + logger.info(f"βœ… DATABASE: Website analysis saved to database for user {self.user_id}") + elif step.step_number == 3: # Research Preferences + self.db_service.save_research_preferences(self.user_id, step.data, db) + logger.info(f"βœ… DATABASE: Research preferences saved to database for user {self.user_id}") + elif step.step_number == 4: # Persona Generation + self.db_service.save_persona_data(self.user_id, step.data, db) + logger.info(f"βœ… DATABASE: Persona data saved to database for user {self.user_id}") + + logger.info(f"Progress saved to database for user {self.user_id}") + finally: + db.close() + + except Exception as e: + logger.error(f"Error saving progress to database: {str(e)}") + raise + + def load_progress_from_db(self): + """Load progress from database.""" + if not self.use_database or not self.db_service or not self.user_id: + logger.warning("Cannot load progress: database service not available or user_id not set") + return + + try: + from services.database import SessionLocal + db = SessionLocal() + try: + # Get session data + session = self.db_service.get_session_by_user(self.user_id, db) + if not session: + logger.info(f"No existing onboarding session found for user {self.user_id}, starting fresh") + return + + # Restore session data + self.current_step = session.current_step or 1 + self.started_at = session.started_at.isoformat() if session.started_at else self.started_at + self.last_updated = session.last_updated.isoformat() if session.last_updated else self.last_updated + self.is_completed = session.is_completed or False + self.completed_at = session.completed_at.isoformat() if session.completed_at else None + + # Load step-specific data from database + self._load_step_data_from_db(db) + + # Fix any corrupted state + self._fix_corrupted_state() + + logger.info(f"Progress loaded from database for user {self.user_id}") + finally: + db.close() + except Exception as e: + logger.error(f"Error loading progress from database: {str(e)}") + # Don't fail if database loading fails - start fresh + + def _load_step_data_from_db(self, db): + """Load step-specific data from database tables.""" + try: + # Load API keys (step 1) + api_keys = self.db_service.get_api_keys(self.user_id, db) + if api_keys: + step1 = self.get_step_data(1) + if step1: + step1.status = StepStatus.COMPLETED + step1.data = {'api_keys': api_keys} + step1.completed_at = datetime.now().isoformat() + + # Load website analysis (step 2) + website_analysis = self.db_service.get_website_analysis(self.user_id, db) + if website_analysis: + step2 = self.get_step_data(2) + if step2: + step2.status = StepStatus.COMPLETED + step2.data = website_analysis + step2.completed_at = datetime.now().isoformat() + + # Load research preferences (step 3) + research_prefs = self.db_service.get_research_preferences(self.user_id, db) + if research_prefs: + step3 = self.get_step_data(3) + if step3: + step3.status = StepStatus.COMPLETED + step3.data = research_prefs + step3.completed_at = datetime.now().isoformat() + + # Load persona data (step 4) + persona_data = self.db_service.get_persona_data(self.user_id, db) + if persona_data: + step4 = self.get_step_data(4) + if step4: + step4.status = StepStatus.COMPLETED + step4.data = persona_data + step4.completed_at = datetime.now().isoformat() + + logger.info("Step data loaded from database") + except Exception as e: + logger.error(f"Error loading step data from database: {str(e)}") + + def _fix_corrupted_state(self): + """Fix any corrupted progress state.""" + # Check if all steps are completed + all_steps_completed = all(s.status in [StepStatus.COMPLETED, StepStatus.SKIPPED] for s in self.steps) + + if all_steps_completed: + self.is_completed = True + self.completed_at = self.completed_at or datetime.now().isoformat() + self.current_step = len(self.steps) + else: + # Find the first incomplete step + for i, step in enumerate(self.steps): + if step.status == StepStatus.PENDING: + self.current_step = step.step_number + break + + +class APIKeyManager: + """Manages API keys for different providers.""" + + def __init__(self): + self.api_keys = {} + self._load_from_env() + + def _load_from_env(self): + """Load API keys from environment variables.""" + providers = [ + 'OPENAI_API_KEY', + 'ANTHROPIC_API_KEY', + 'GEMINI_API_KEY', + 'MISTRAL_API_KEY', + 'TAVILY_API_KEY', + 'SERPER_API_KEY', + 'METAPHOR_API_KEY', + 'FIRECRAWL_API_KEY', + 'STABILITY_API_KEY' + ] + + for provider in providers: + key = os.getenv(provider) + if key: + # Convert provider name to lowercase for consistency + provider_name = provider.replace('_API_KEY', '').lower() + self.api_keys[provider_name] = key + logger.info(f"Loaded {provider_name} API key from environment") + + def get_api_key(self, provider: str) -> Optional[str]: + """Get API key for a provider.""" + return self.api_keys.get(provider.lower()) + + def save_api_key(self, provider: str, api_key: str): + """Save API key to environment and memory.""" + provider_lower = provider.lower() + self.api_keys[provider_lower] = api_key + + # Update environment variable + env_var = f"{provider.upper()}_API_KEY" + os.environ[env_var] = api_key + + logger.info(f"Saved {provider} API key") + + def has_api_key(self, provider: str) -> bool: + """Check if API key exists for provider.""" + return provider.lower() in self.api_keys and bool(self.api_keys[provider.lower()]) + + def get_all_keys(self) -> Dict[str, str]: + """Get all API keys.""" + return self.api_keys.copy() + + def remove_api_key(self, provider: str): + """Remove API key for provider.""" + provider_lower = provider.lower() + if provider_lower in self.api_keys: + del self.api_keys[provider_lower] + + # Remove from environment + env_var = f"{provider.upper()}_API_KEY" + if env_var in os.environ: + del os.environ[env_var] + + logger.info(f"Removed {provider} API key") + + +# Global instances +_user_onboarding_progress_cache = {} + +def get_user_onboarding_progress(user_id: str) -> OnboardingProgress: + """Get user-specific onboarding progress instance.""" + global _user_onboarding_progress_cache + safe_user_id = ''.join([c if c.isalnum() or c in ('-', '_') else '_' for c in str(user_id)]) + if safe_user_id in _user_onboarding_progress_cache: + return _user_onboarding_progress_cache[safe_user_id] + + # Pass user_id to enable database persistence + instance = OnboardingProgress(user_id=user_id) + _user_onboarding_progress_cache[safe_user_id] = instance + return instance + +def get_onboarding_progress_for_user(user_id: str) -> OnboardingProgress: + """Get user-specific onboarding progress instance (alias for compatibility).""" + return get_user_onboarding_progress(user_id) + +def get_onboarding_progress(): + """Get the global onboarding progress instance.""" + if not hasattr(get_onboarding_progress, '_instance'): + get_onboarding_progress._instance = OnboardingProgress() + return get_onboarding_progress._instance + +def get_api_key_manager() -> APIKeyManager: + """Get the global API key manager instance.""" + if not hasattr(get_api_key_manager, '_instance'): + get_api_key_manager._instance = APIKeyManager() + return get_api_key_manager._instance diff --git a/backend/services/onboarding_data_service.py b/backend/services/onboarding/data_service.py similarity index 98% rename from backend/services/onboarding_data_service.py rename to backend/services/onboarding/data_service.py index f4b59a98..38f0fdbe 100644 --- a/backend/services/onboarding_data_service.py +++ b/backend/services/onboarding/data_service.py @@ -15,8 +15,9 @@ from models.onboarding import OnboardingSession, WebsiteAnalysis, ResearchPrefer class OnboardingDataService: """Service to extract and use real onboarding data for AI personalization.""" - def __init__(self): + def __init__(self, db: Optional[Session] = None): """Initialize the onboarding data service.""" + self.db = db logger.info("OnboardingDataService initialized") def get_user_website_analysis(self, user_id: int) -> Optional[Dict[str, Any]]: @@ -30,7 +31,7 @@ class OnboardingDataService: Website analysis data or None if not found """ try: - session = get_db_session() + session = self.db or get_db_session() # Find onboarding session for user onboarding_session = session.query(OnboardingSession).filter( @@ -67,7 +68,7 @@ class OnboardingDataService: Research preferences data or None if not found """ try: - session = get_db_session() + session = self.db or get_db_session() # Find onboarding session for user onboarding_session = session.query(OnboardingSession).filter( @@ -287,4 +288,4 @@ class OnboardingDataService: "content_topics": ["Industry trends", "Expert insights"], "search_intent": {"intent": "practical", "focus": "implementation"} } - } \ No newline at end of file + } diff --git a/backend/services/onboarding_database_service.py b/backend/services/onboarding/database_service.py similarity index 100% rename from backend/services/onboarding_database_service.py rename to backend/services/onboarding/database_service.py diff --git a/backend/services/onboarding_progress_service.py b/backend/services/onboarding/progress_service.py similarity index 98% rename from backend/services/onboarding_progress_service.py rename to backend/services/onboarding/progress_service.py index 6afecfb0..ce051e36 100644 --- a/backend/services/onboarding_progress_service.py +++ b/backend/services/onboarding/progress_service.py @@ -10,7 +10,7 @@ from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError from services.database import SessionLocal -from services.onboarding_database_service import OnboardingDatabaseService +from .database_service import OnboardingDatabaseService class OnboardingProgressService: diff --git a/backend/services/progressive_setup_service.py b/backend/services/progressive_setup_service.py index 4451c885..afb3b4e4 100644 --- a/backend/services/progressive_setup_service.py +++ b/backend/services/progressive_setup_service.py @@ -12,7 +12,7 @@ from sqlalchemy.orm import Session from sqlalchemy import text from services.user_workspace_manager import UserWorkspaceManager -from services.api_key_manager import get_onboarding_progress_for_user +from services.onboarding.api_key_manager import get_onboarding_progress_for_user class ProgressiveSetupService: """Manages progressive backend setup based on user progress.""" diff --git a/backend/services/seo/__init__.py b/backend/services/seo/__init__.py new file mode 100644 index 00000000..6283a05b --- /dev/null +++ b/backend/services/seo/__init__.py @@ -0,0 +1,22 @@ +""" +SEO Dashboard Services Package + +This package provides comprehensive SEO analytics and dashboard functionality, +leveraging existing OAuth connections from onboarding step 5 and competitive +analysis from step 3. + +Services: +- SEODashboardService: Main orchestration service for dashboard data +- AnalyticsAggregator: Combines and normalizes data from multiple platforms +- CompetitiveAnalyzer: Leverages onboarding research data for competitive insights +""" + +from .dashboard_service import SEODashboardService +from .analytics_aggregator import AnalyticsAggregator +from .competitive_analyzer import CompetitiveAnalyzer + +__all__ = [ + "SEODashboardService", + "AnalyticsAggregator", + "CompetitiveAnalyzer", +] \ No newline at end of file diff --git a/backend/services/seo/analytics_aggregator.py b/backend/services/seo/analytics_aggregator.py new file mode 100644 index 00000000..d9ec258c --- /dev/null +++ b/backend/services/seo/analytics_aggregator.py @@ -0,0 +1,447 @@ +""" +Analytics Aggregator Service + +Combines and normalizes data from multiple platforms (GSC, Bing, etc.) +for the SEO dashboard. Provides unified metrics and timeseries data. +""" + +from typing import Dict, Any, List, Optional, Tuple +from datetime import datetime, timedelta +from collections import defaultdict +from loguru import logger + +from utils.logger_utils import get_service_logger + +logger = get_service_logger("analytics_aggregator") + +class AnalyticsAggregator: + """Aggregates analytics data from multiple platforms.""" + + def __init__(self): + """Initialize the analytics aggregator.""" + pass + + def combine_metrics(self, gsc_data: Dict[str, Any], bing_data: Dict[str, Any]) -> Dict[str, Any]: + """ + Combine metrics from GSC and Bing data. + + Args: + gsc_data: GSC analytics data + bing_data: Bing analytics data + + Returns: + Combined metrics dictionary + """ + try: + # Extract metrics from each platform + gsc_metrics = self._extract_gsc_metrics(gsc_data) + bing_metrics = self._extract_bing_metrics(bing_data) + + # Combine the metrics + combined = { + "clicks": gsc_metrics.get("clicks", 0) + bing_metrics.get("clicks", 0), + "impressions": gsc_metrics.get("impressions", 0) + bing_metrics.get("impressions", 0), + "ctr": self._calculate_combined_ctr(gsc_metrics, bing_metrics), + "position": self._calculate_combined_position(gsc_metrics, bing_metrics), + "queries": gsc_metrics.get("queries", 0) + bing_metrics.get("queries", 0), + "pages": gsc_metrics.get("pages", 0) + bing_metrics.get("pages", 0), + "countries": self._combine_countries(gsc_metrics.get("countries", []), bing_metrics.get("countries", [])), + "devices": self._combine_devices(gsc_metrics.get("devices", []), bing_metrics.get("devices", [])), + "sources": { + "gsc": gsc_metrics, + "bing": bing_metrics + } + } + + logger.debug(f"Combined metrics: {combined}") + return combined + + except Exception as e: + logger.error(f"Error combining metrics: {e}") + return { + "clicks": 0, + "impressions": 0, + "ctr": 0.0, + "position": 0.0, + "queries": 0, + "pages": 0, + "countries": [], + "devices": [], + "sources": {"gsc": {}, "bing": {}} + } + + def normalize_timeseries(self, gsc_daily: List[Dict[str, Any]], bing_daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """ + Normalize timeseries data from GSC and Bing to aligned date series. + + Args: + gsc_daily: GSC daily data + bing_daily: Bing daily data + + Returns: + Normalized timeseries data + """ + try: + # Convert to date-indexed dictionaries + gsc_by_date = {item["date"]: item for item in gsc_daily} + bing_by_date = {item["date"]: item for item in bing_daily} + + # Get all unique dates + all_dates = set(gsc_by_date.keys()) | set(bing_by_date.keys()) + sorted_dates = sorted(all_dates) + + # Create normalized timeseries + timeseries = [] + for date in sorted_dates: + gsc_item = gsc_by_date.get(date, {}) + bing_item = bing_by_date.get(date, {}) + + normalized_item = { + "date": date, + "clicks": gsc_item.get("clicks", 0) + bing_item.get("clicks", 0), + "impressions": gsc_item.get("impressions", 0) + bing_item.get("impressions", 0), + "ctr": self._calculate_daily_ctr(gsc_item, bing_item), + "position": self._calculate_daily_position(gsc_item, bing_item), + "gsc_clicks": gsc_item.get("clicks", 0), + "gsc_impressions": gsc_item.get("impressions", 0), + "bing_clicks": bing_item.get("clicks", 0), + "bing_impressions": bing_item.get("impressions", 0) + } + + timeseries.append(normalized_item) + + logger.debug(f"Normalized timeseries with {len(timeseries)} data points") + return timeseries + + except Exception as e: + logger.error(f"Error normalizing timeseries: {e}") + return [] + + def top_queries_combined(self, gsc_data: Dict[str, Any], bing_data: Dict[str, Any], limit: int = 20) -> List[Dict[str, Any]]: + """ + Get top queries combined from GSC and Bing data. + + Args: + gsc_data: GSC data + bing_data: Bing data + limit: Maximum number of queries to return + + Returns: + List of top queries with source tags + """ + try: + # Extract queries from each platform + gsc_queries = self._extract_gsc_queries(gsc_data) + bing_queries = self._extract_bing_queries(bing_data) + + # Combine and deduplicate queries + query_map = {} + + # Add GSC queries + for query in gsc_queries: + query_text = query.get("query", "").lower() + if query_text in query_map: + # Merge data from both sources + existing = query_map[query_text] + existing["gsc_clicks"] = query.get("clicks", 0) + existing["gsc_impressions"] = query.get("impressions", 0) + existing["gsc_ctr"] = query.get("ctr", 0) + existing["gsc_position"] = query.get("position", 0) + existing["total_clicks"] = existing.get("total_clicks", 0) + query.get("clicks", 0) + existing["total_impressions"] = existing.get("total_impressions", 0) + query.get("impressions", 0) + existing["sources"].append("gsc") + else: + query_map[query_text] = { + "query": query.get("query", ""), + "gsc_clicks": query.get("clicks", 0), + "gsc_impressions": query.get("impressions", 0), + "gsc_ctr": query.get("ctr", 0), + "gsc_position": query.get("position", 0), + "bing_clicks": 0, + "bing_impressions": 0, + "bing_ctr": 0, + "bing_position": 0, + "total_clicks": query.get("clicks", 0), + "total_impressions": query.get("impressions", 0), + "sources": ["gsc"] + } + + # Add Bing queries + for query in bing_queries: + query_text = query.get("query", "").lower() + if query_text in query_map: + # Merge data from both sources + existing = query_map[query_text] + existing["bing_clicks"] = query.get("clicks", 0) + existing["bing_impressions"] = query.get("impressions", 0) + existing["bing_ctr"] = query.get("ctr", 0) + existing["bing_position"] = query.get("position", 0) + existing["total_clicks"] = existing.get("total_clicks", 0) + query.get("clicks", 0) + existing["total_impressions"] = existing.get("total_impressions", 0) + query.get("impressions", 0) + existing["sources"].append("bing") + else: + query_map[query_text] = { + "query": query.get("query", ""), + "gsc_clicks": 0, + "gsc_impressions": 0, + "gsc_ctr": 0, + "gsc_position": 0, + "bing_clicks": query.get("clicks", 0), + "bing_impressions": query.get("impressions", 0), + "bing_ctr": query.get("ctr", 0), + "bing_position": query.get("position", 0), + "total_clicks": query.get("clicks", 0), + "total_impressions": query.get("impressions", 0), + "sources": ["bing"] + } + + # Sort by total clicks and return top N + sorted_queries = sorted( + query_map.values(), + key=lambda x: x["total_clicks"], + reverse=True + ) + + logger.debug(f"Combined {len(sorted_queries)} unique queries, returning top {limit}") + return sorted_queries[:limit] + + except Exception as e: + logger.error(f"Error combining top queries: {e}") + return [] + + def _extract_gsc_metrics(self, gsc_data: Dict[str, Any]) -> Dict[str, Any]: + """Extract metrics from GSC data.""" + try: + if "error" in gsc_data: + return {} + + data = gsc_data.get("data", {}) + return { + "clicks": data.get("clicks", 0), + "impressions": data.get("impressions", 0), + "ctr": data.get("ctr", 0.0), + "position": data.get("position", 0.0), + "queries": len(data.get("queries", [])), + "pages": len(data.get("pages", [])), + "countries": data.get("countries", []), + "devices": data.get("devices", []) + } + except Exception as e: + logger.error(f"Error extracting GSC metrics: {e}") + return {} + + def _extract_bing_metrics(self, bing_data: Dict[str, Any]) -> Dict[str, Any]: + """Extract metrics from Bing data.""" + try: + if "error" in bing_data: + return {} + + data = bing_data.get("data", {}) + return { + "clicks": data.get("clicks", 0), + "impressions": data.get("impressions", 0), + "ctr": data.get("ctr", 0.0), + "position": data.get("position", 0.0), + "queries": len(data.get("queries", [])), + "pages": len(data.get("pages", [])), + "countries": data.get("countries", []), + "devices": data.get("devices", []) + } + except Exception as e: + logger.error(f"Error extracting Bing metrics: {e}") + return {} + + def _extract_gsc_queries(self, gsc_data: Dict[str, Any]) -> List[Dict[str, Any]]: + """Extract queries from GSC data.""" + try: + if "error" in gsc_data: + return [] + + data = gsc_data.get("data", {}) + return data.get("queries", []) + except Exception as e: + logger.error(f"Error extracting GSC queries: {e}") + return [] + + def _extract_bing_queries(self, bing_data: Dict[str, Any]) -> List[Dict[str, Any]]: + """Extract queries from Bing data.""" + try: + if "error" in bing_data: + return [] + + data = bing_data.get("data", {}) + return data.get("queries", []) + except Exception as e: + logger.error(f"Error extracting Bing queries: {e}") + return [] + + def _calculate_combined_ctr(self, gsc_metrics: Dict[str, Any], bing_metrics: Dict[str, Any]) -> float: + """Calculate combined CTR from GSC and Bing metrics.""" + try: + total_clicks = gsc_metrics.get("clicks", 0) + bing_metrics.get("clicks", 0) + total_impressions = gsc_metrics.get("impressions", 0) + bing_metrics.get("impressions", 0) + + if total_impressions > 0: + return total_clicks / total_impressions + return 0.0 + except Exception as e: + logger.error(f"Error calculating combined CTR: {e}") + return 0.0 + + def _calculate_combined_position(self, gsc_metrics: Dict[str, Any], bing_metrics: Dict[str, Any]) -> float: + """Calculate combined average position from GSC and Bing metrics.""" + try: + gsc_position = gsc_metrics.get("position", 0) + bing_position = bing_metrics.get("position", 0) + + # Weight by impressions if available + gsc_impressions = gsc_metrics.get("impressions", 0) + bing_impressions = bing_metrics.get("impressions", 0) + total_impressions = gsc_impressions + bing_impressions + + if total_impressions > 0: + return (gsc_position * gsc_impressions + bing_position * bing_impressions) / total_impressions + elif gsc_position > 0 and bing_position > 0: + return (gsc_position + bing_position) / 2 + elif gsc_position > 0: + return gsc_position + elif bing_position > 0: + return bing_position + return 0.0 + except Exception as e: + logger.error(f"Error calculating combined position: {e}") + return 0.0 + + def _calculate_daily_ctr(self, gsc_item: Dict[str, Any], bing_item: Dict[str, Any]) -> float: + """Calculate CTR for a single day.""" + try: + total_clicks = gsc_item.get("clicks", 0) + bing_item.get("clicks", 0) + total_impressions = gsc_item.get("impressions", 0) + bing_item.get("impressions", 0) + + if total_impressions > 0: + return total_clicks / total_impressions + return 0.0 + except Exception as e: + logger.error(f"Error calculating daily CTR: {e}") + return 0.0 + + def _calculate_daily_position(self, gsc_item: Dict[str, Any], bing_item: Dict[str, Any]) -> float: + """Calculate average position for a single day.""" + try: + gsc_position = gsc_item.get("position", 0) + bing_position = bing_item.get("position", 0) + + if gsc_position > 0 and bing_position > 0: + return (gsc_position + bing_position) / 2 + elif gsc_position > 0: + return gsc_position + elif bing_position > 0: + return bing_position + return 0.0 + except Exception as e: + logger.error(f"Error calculating daily position: {e}") + return 0.0 + + def _combine_countries(self, gsc_countries: List[Dict[str, Any]], bing_countries: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Combine country data from GSC and Bing.""" + try: + country_map = {} + + # Add GSC countries + for country in gsc_countries: + country_code = country.get("country", "") + if country_code in country_map: + existing = country_map[country_code] + existing["gsc_clicks"] = country.get("clicks", 0) + existing["gsc_impressions"] = country.get("impressions", 0) + existing["total_clicks"] = existing.get("total_clicks", 0) + country.get("clicks", 0) + existing["total_impressions"] = existing.get("total_impressions", 0) + country.get("impressions", 0) + else: + country_map[country_code] = { + "country": country_code, + "gsc_clicks": country.get("clicks", 0), + "gsc_impressions": country.get("impressions", 0), + "bing_clicks": 0, + "bing_impressions": 0, + "total_clicks": country.get("clicks", 0), + "total_impressions": country.get("impressions", 0) + } + + # Add Bing countries + for country in bing_countries: + country_code = country.get("country", "") + if country_code in country_map: + existing = country_map[country_code] + existing["bing_clicks"] = country.get("clicks", 0) + existing["bing_impressions"] = country.get("impressions", 0) + existing["total_clicks"] = existing.get("total_clicks", 0) + country.get("clicks", 0) + existing["total_impressions"] = existing.get("total_impressions", 0) + country.get("impressions", 0) + else: + country_map[country_code] = { + "country": country_code, + "gsc_clicks": 0, + "gsc_impressions": 0, + "bing_clicks": country.get("clicks", 0), + "bing_impressions": country.get("impressions", 0), + "total_clicks": country.get("clicks", 0), + "total_impressions": country.get("impressions", 0) + } + + # Sort by total clicks + return sorted(country_map.values(), key=lambda x: x["total_clicks"], reverse=True) + + except Exception as e: + logger.error(f"Error combining countries: {e}") + return [] + + def _combine_devices(self, gsc_devices: List[Dict[str, Any]], bing_devices: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Combine device data from GSC and Bing.""" + try: + device_map = {} + + # Add GSC devices + for device in gsc_devices: + device_type = device.get("device", "") + if device_type in device_map: + existing = device_map[device_type] + existing["gsc_clicks"] = device.get("clicks", 0) + existing["gsc_impressions"] = device.get("impressions", 0) + existing["total_clicks"] = existing.get("total_clicks", 0) + device.get("clicks", 0) + existing["total_impressions"] = existing.get("total_impressions", 0) + device.get("impressions", 0) + else: + device_map[device_type] = { + "device": device_type, + "gsc_clicks": device.get("clicks", 0), + "gsc_impressions": device.get("impressions", 0), + "bing_clicks": 0, + "bing_impressions": 0, + "total_clicks": device.get("clicks", 0), + "total_impressions": device.get("impressions", 0) + } + + # Add Bing devices + for device in bing_devices: + device_type = device.get("device", "") + if device_type in device_map: + existing = device_map[device_type] + existing["bing_clicks"] = device.get("clicks", 0) + existing["bing_impressions"] = device.get("impressions", 0) + existing["total_clicks"] = existing.get("total_clicks", 0) + device.get("clicks", 0) + existing["total_impressions"] = existing.get("total_impressions", 0) + device.get("impressions", 0) + else: + device_map[device_type] = { + "device": device_type, + "gsc_clicks": 0, + "gsc_impressions": 0, + "bing_clicks": device.get("clicks", 0), + "bing_impressions": device.get("impressions", 0), + "total_clicks": device.get("clicks", 0), + "total_impressions": device.get("impressions", 0) + } + + # Sort by total clicks + return sorted(device_map.values(), key=lambda x: x["total_clicks"], reverse=True) + + except Exception as e: + logger.error(f"Error combining devices: {e}") + return [] \ No newline at end of file diff --git a/backend/services/seo/competitive_analyzer.py b/backend/services/seo/competitive_analyzer.py new file mode 100644 index 00000000..c979a664 --- /dev/null +++ b/backend/services/seo/competitive_analyzer.py @@ -0,0 +1,402 @@ +""" +Competitive Analyzer Service + +Leverages onboarding step 3 research data and combines it with GSC/Bing +query data to provide competitive insights. Superior to SEMrush/Ahrefs +because it uses actual user data and personalized content strategy. +""" + +from typing import Dict, Any, List, Optional, Set, Tuple +from datetime import datetime, timedelta +from sqlalchemy.orm import Session +from loguru import logger + +from utils.logger_utils import get_service_logger +from services.onboarding.data_service import OnboardingDataService +from services.calendar_generation_datasource_framework.data_processing.comprehensive_user_data import ComprehensiveUserDataProcessor + +logger = get_service_logger("competitive_analyzer") + +class CompetitiveAnalyzer: + """Analyzes competitive landscape using onboarding research data and analytics.""" + + def __init__(self, db: Session): + """Initialize the competitive analyzer.""" + self.db = db + self.user_data_service = OnboardingDataService(db) + self.comprehensive_processor = ComprehensiveUserDataProcessor(db) + + async def get_competitive_insights(self, user_id: str) -> Dict[str, Any]: + """ + Get comprehensive competitive insights for a user. + + Args: + user_id: User ID + + Returns: + Dictionary containing competitive insights + """ + try: + # Get user's research preferences and competitor data + research_prefs = self.user_data_service.get_user_research_preferences(user_id) + competitors = research_prefs.get('competitors', []) if research_prefs else [] + + if not competitors: + logger.info(f"No competitors found for user {user_id}") + return { + "competitor_keywords": [], + "content_gaps": [], + "opportunity_score": 0, + "competitors_analyzed": 0, + "last_updated": datetime.now().isoformat() + } + + # Get comprehensive user data including competitor analysis + comprehensive_data = self.comprehensive_processor.get_comprehensive_user_data(user_id) + competitor_analysis = comprehensive_data.get('competitor_analysis', {}) + + # Extract competitor keywords and content topics + competitor_keywords = self._extract_competitor_keywords(competitor_analysis, competitors) + + # Get user's current keywords from GSC/Bing (would be passed in real implementation) + user_keywords = self._get_user_keywords(user_id) + + # Find content gaps + content_gaps = self._find_content_gaps(user_keywords, competitor_keywords) + + # Calculate opportunity score + opportunity_score = self._calculate_opportunity_score(content_gaps, competitor_keywords) + + # Generate actionable insights + insights = self._generate_insights(content_gaps, competitor_keywords, opportunity_score) + + return { + "competitor_keywords": competitor_keywords, + "content_gaps": content_gaps, + "opportunity_score": opportunity_score, + "competitors_analyzed": len(competitors), + "insights": insights, + "last_updated": datetime.now().isoformat() + } + + except Exception as e: + logger.error(f"Error getting competitive insights for user {user_id}: {e}") + return { + "competitor_keywords": [], + "content_gaps": [], + "opportunity_score": 0, + "competitors_analyzed": 0, + "insights": [], + "last_updated": datetime.now().isoformat() + } + + def _extract_competitor_keywords(self, competitor_analysis: Dict[str, Any], competitors: List[str]) -> List[Dict[str, Any]]: + """Extract keywords from competitor analysis.""" + try: + keywords = [] + + # Extract from competitor analysis data + for competitor_url in competitors: + competitor_data = competitor_analysis.get(competitor_url, {}) + + # Extract keywords from various sources + competitor_keywords = competitor_data.get('keywords', []) + content_topics = competitor_data.get('content_topics', []) + meta_keywords = competitor_data.get('meta_keywords', []) + + # Combine all keyword sources + all_keywords = set() + all_keywords.update(competitor_keywords) + all_keywords.update(content_topics) + all_keywords.update(meta_keywords) + + # Add to keywords list with competitor attribution + for keyword in all_keywords: + if keyword and len(keyword.strip()) > 0: + keywords.append({ + "keyword": keyword.strip(), + "competitor": competitor_url, + "source": "analysis", + "volume_estimate": competitor_data.get('keyword_volume', {}).get(keyword, 0), + "difficulty_estimate": competitor_data.get('keyword_difficulty', {}).get(keyword, 0), + "relevance_score": self._calculate_relevance_score(keyword, competitor_data) + }) + + # Remove duplicates and sort by relevance + unique_keywords = self._deduplicate_keywords(keywords) + sorted_keywords = sorted(unique_keywords, key=lambda x: x['relevance_score'], reverse=True) + + logger.debug(f"Extracted {len(sorted_keywords)} unique competitor keywords") + return sorted_keywords[:100] # Limit to top 100 + + except Exception as e: + logger.error(f"Error extracting competitor keywords: {e}") + return [] + + def _get_user_keywords(self, user_id: str) -> Set[str]: + """Get user's current keywords from GSC/Bing data.""" + try: + # In a real implementation, this would fetch from GSC/Bing APIs + # For now, return empty set as placeholder + # This would be called from the dashboard service with actual query data + return set() + except Exception as e: + logger.error(f"Error getting user keywords: {e}") + return set() + + def _find_content_gaps(self, user_keywords: Set[str], competitor_keywords: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Find content gaps between user and competitors.""" + try: + content_gaps = [] + user_keywords_lower = {kw.lower() for kw in user_keywords} + + for comp_keyword in competitor_keywords: + keyword = comp_keyword['keyword'].lower() + + # Check if user doesn't have this keyword + if keyword not in user_keywords_lower: + # Check for partial matches (related keywords) + is_related = any( + self._are_keywords_related(keyword, user_kw) + for user_kw in user_keywords_lower + ) + + if not is_related: + content_gaps.append({ + "keyword": comp_keyword['keyword'], + "competitor": comp_keyword['competitor'], + "volume_estimate": comp_keyword.get('volume_estimate', 0), + "difficulty_estimate": comp_keyword.get('difficulty_estimate', 0), + "relevance_score": comp_keyword['relevance_score'], + "opportunity_type": self._classify_opportunity_type(comp_keyword), + "content_suggestion": self._generate_content_suggestion(comp_keyword) + }) + + # Sort by opportunity score (volume * relevance / difficulty) + sorted_gaps = sorted( + content_gaps, + key=lambda x: (x['volume_estimate'] * x['relevance_score']) / max(x['difficulty_estimate'], 1), + reverse=True + ) + + logger.debug(f"Found {len(sorted_gaps)} content gaps") + return sorted_gaps[:50] # Limit to top 50 + + except Exception as e: + logger.error(f"Error finding content gaps: {e}") + return [] + + def _calculate_opportunity_score(self, content_gaps: List[Dict[str, Any]], competitor_keywords: List[Dict[str, Any]]) -> int: + """Calculate overall opportunity score (0-100).""" + try: + if not content_gaps: + return 0 + + # Calculate average opportunity metrics + avg_volume = sum(gap['volume_estimate'] for gap in content_gaps) / len(content_gaps) + avg_relevance = sum(gap['relevance_score'] for gap in content_gaps) / len(content_gaps) + avg_difficulty = sum(gap['difficulty_estimate'] for gap in content_gaps) / len(content_gaps) + + # Calculate opportunity score + # Higher volume and relevance = higher score + # Lower difficulty = higher score + volume_score = min(avg_volume / 1000, 1.0) * 40 # Max 40 points for volume + relevance_score = avg_relevance * 30 # Max 30 points for relevance + difficulty_score = max(0, (10 - avg_difficulty) / 10) * 30 # Max 30 points for low difficulty + + total_score = volume_score + relevance_score + difficulty_score + opportunity_score = min(int(total_score), 100) + + logger.debug(f"Calculated opportunity score: {opportunity_score}") + return opportunity_score + + except Exception as e: + logger.error(f"Error calculating opportunity score: {e}") + return 0 + + def _generate_insights(self, content_gaps: List[Dict[str, Any]], competitor_keywords: List[Dict[str, Any]], opportunity_score: int) -> List[Dict[str, Any]]: + """Generate actionable insights from competitive analysis.""" + try: + insights = [] + + # High opportunity score insight + if opportunity_score > 70: + insights.append({ + "type": "opportunity", + "priority": "high", + "title": "High Competitive Opportunity", + "description": f"Your opportunity score is {opportunity_score}% - competitors are ranking for many keywords you're not targeting.", + "action": "Create content for the identified keyword gaps to capture more organic traffic." + }) + elif opportunity_score > 40: + insights.append({ + "type": "opportunity", + "priority": "medium", + "title": "Moderate Competitive Opportunity", + "description": f"Your opportunity score is {opportunity_score}% - there are some keyword gaps you could target.", + "action": "Review the content gaps and prioritize high-volume, low-difficulty keywords." + }) + + # Content gap insights + if content_gaps: + high_volume_gaps = [gap for gap in content_gaps if gap['volume_estimate'] > 500] + if high_volume_gaps: + insights.append({ + "type": "content", + "priority": "high", + "title": "High-Volume Content Gaps", + "description": f"Found {len(high_volume_gaps)} high-volume keywords that competitors rank for but you don't.", + "action": "Create comprehensive content targeting these high-volume keywords." + }) + + low_difficulty_gaps = [gap for gap in content_gaps if gap['difficulty_estimate'] < 3] + if low_difficulty_gaps: + insights.append({ + "type": "content", + "priority": "medium", + "title": "Low-Difficulty Content Gaps", + "description": f"Found {len(low_difficulty_gaps)} low-difficulty keywords that would be easy to rank for.", + "action": "Quick wins: Create content for these low-difficulty keywords first." + }) + + # Competitor analysis insights + if competitor_keywords: + top_competitors = {} + for kw in competitor_keywords: + competitor = kw['competitor'] + if competitor not in top_competitors: + top_competitors[competitor] = 0 + top_competitors[competitor] += 1 + + top_competitor = max(top_competitors.items(), key=lambda x: x[1]) if top_competitors else None + if top_competitor: + insights.append({ + "type": "competitive", + "priority": "medium", + "title": "Top Competitor Analysis", + "description": f"{top_competitor[0]} has the most keyword overlap with your content strategy.", + "action": f"Analyze {top_competitor[0]}'s content strategy for additional keyword opportunities." + }) + + return insights + + except Exception as e: + logger.error(f"Error generating insights: {e}") + return [] + + def _deduplicate_keywords(self, keywords: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Remove duplicate keywords and merge data.""" + try: + keyword_map = {} + + for kw in keywords: + keyword = kw['keyword'].lower() + if keyword in keyword_map: + # Merge data from multiple competitors + existing = keyword_map[keyword] + existing['competitors'].append(kw['competitor']) + existing['volume_estimate'] = max(existing['volume_estimate'], kw['volume_estimate']) + existing['relevance_score'] = max(existing['relevance_score'], kw['relevance_score']) + else: + keyword_map[keyword] = { + 'keyword': kw['keyword'], + 'competitors': [kw['competitor']], + 'source': kw['source'], + 'volume_estimate': kw['volume_estimate'], + 'difficulty_estimate': kw['difficulty_estimate'], + 'relevance_score': kw['relevance_score'] + } + + return list(keyword_map.values()) + + except Exception as e: + logger.error(f"Error deduplicating keywords: {e}") + return [] + + def _calculate_relevance_score(self, keyword: str, competitor_data: Dict[str, Any]) -> float: + """Calculate relevance score for a keyword based on competitor data.""" + try: + # Base relevance score + relevance = 0.5 + + # Increase relevance based on keyword frequency in competitor content + content_frequency = competitor_data.get('content_frequency', {}) + if keyword in content_frequency: + relevance += min(content_frequency[keyword] / 10, 0.3) + + # Increase relevance based on meta keyword presence + meta_keywords = competitor_data.get('meta_keywords', []) + if keyword in meta_keywords: + relevance += 0.2 + + # Increase relevance based on title presence + titles = competitor_data.get('titles', []) + if any(keyword.lower() in title.lower() for title in titles): + relevance += 0.2 + + # Normalize to 0-1 range + return min(relevance, 1.0) + + except Exception as e: + logger.error(f"Error calculating relevance score: {e}") + return 0.5 + + def _are_keywords_related(self, keyword1: str, keyword2: str) -> bool: + """Check if two keywords are related.""" + try: + # Simple similarity check - can be enhanced with NLP + words1 = set(keyword1.lower().split()) + words2 = set(keyword2.lower().split()) + + # Check for word overlap + overlap = len(words1.intersection(words2)) + total_words = len(words1.union(words2)) + + if total_words == 0: + return False + + similarity = overlap / total_words + return similarity > 0.3 # 30% word overlap threshold + + except Exception as e: + logger.error(f"Error checking keyword relatedness: {e}") + return False + + def _classify_opportunity_type(self, keyword_data: Dict[str, Any]) -> str: + """Classify the type of opportunity for a keyword.""" + try: + volume = keyword_data.get('volume_estimate', 0) + difficulty = keyword_data.get('difficulty_estimate', 0) + relevance = keyword_data.get('relevance_score', 0) + + if volume > 1000 and difficulty < 5 and relevance > 0.7: + return "high_priority" + elif volume > 500 and difficulty < 7 and relevance > 0.5: + return "medium_priority" + elif volume > 100 and difficulty < 8: + return "low_priority" + else: + return "long_term" + + except Exception as e: + logger.error(f"Error classifying opportunity type: {e}") + return "unknown" + + def _generate_content_suggestion(self, keyword_data: Dict[str, Any]) -> str: + """Generate content suggestion for a keyword.""" + try: + keyword = keyword_data['keyword'] + opportunity_type = self._classify_opportunity_type(keyword_data) + + suggestions = { + "high_priority": f"Create comprehensive, in-depth content targeting '{keyword}' - high volume, low difficulty opportunity.", + "medium_priority": f"Consider creating content around '{keyword}' - good volume with moderate competition.", + "low_priority": f"'{keyword}' could be a good long-tail keyword to target in future content.", + "long_term": f"'{keyword}' might be worth monitoring for future content opportunities." + } + + return suggestions.get(opportunity_type, f"Consider creating content around '{keyword}'.") + + except Exception as e: + logger.error(f"Error generating content suggestion: {e}") + return f"Consider creating content around '{keyword_data.get('keyword', 'this keyword')}'." \ No newline at end of file diff --git a/backend/services/seo/dashboard_service.py b/backend/services/seo/dashboard_service.py new file mode 100644 index 00000000..e39e3ccf --- /dev/null +++ b/backend/services/seo/dashboard_service.py @@ -0,0 +1,397 @@ +""" +SEO Dashboard Service + +Main orchestration service that coordinates data fetching from GSC, Bing, +and other analytics sources for the SEO dashboard. Leverages existing +OAuth connections from onboarding step 5. +""" + +from typing import Dict, Any, Optional, List +from datetime import datetime, timedelta +from sqlalchemy.orm import Session +from loguru import logger + +from utils.logger_utils import get_service_logger +from services.gsc_service import GSCService +from services.integrations.bing_oauth import BingOAuthService +from services.bing_analytics_storage_service import BingAnalyticsStorageService +from services.analytics_cache_service import AnalyticsCacheService +from services.onboarding.data_service import OnboardingDataService +from .analytics_aggregator import AnalyticsAggregator +from .competitive_analyzer import CompetitiveAnalyzer + +logger = get_service_logger("seo_dashboard") + +class SEODashboardService: + """Main service for SEO dashboard data orchestration.""" + + def __init__(self, db: Session): + """Initialize the SEO dashboard service.""" + self.db = db + self.gsc_service = GSCService() + self.bing_oauth = BingOAuthService() + self.bing_storage = BingAnalyticsStorageService("sqlite:///alwrity.db") + self.analytics_cache = AnalyticsCacheService() + self.user_data_service = OnboardingDataService(db) + self.analytics_aggregator = AnalyticsAggregator() + self.competitive_analyzer = CompetitiveAnalyzer(db) + + async def get_platform_status(self, user_id: str) -> Dict[str, Any]: + """Get connection status for GSC and Bing platforms.""" + try: + # Check GSC connection + gsc_credentials = self.gsc_service.load_user_credentials(user_id) + gsc_connected = gsc_credentials is not None + + # Check Bing connection with detailed status + bing_token_status = self.bing_oauth.get_user_token_status(user_id) + bing_connected = bing_token_status.get('has_active_tokens', False) + + # Get cached data for last sync info + gsc_data = self.analytics_cache.get('gsc_analytics', user_id) + bing_data = self.analytics_cache.get('bing_analytics', user_id) + + return { + "gsc": { + "connected": gsc_connected, + "sites": self._get_gsc_sites(user_id) if gsc_connected else [], + "last_sync": gsc_data.get('last_updated') if gsc_data else None, + "status": "connected" if gsc_connected else "disconnected" + }, + "bing": { + "connected": bing_connected, + "sites": self._get_bing_sites(user_id) if bing_connected else [], + "last_sync": bing_data.get('last_updated') if bing_data else None, + "status": "connected" if bing_connected else ("expired" if bing_token_status.get('has_expired_tokens') else "disconnected"), + "has_expired_tokens": bing_token_status.get('has_expired_tokens', False), + "last_token_date": bing_token_status.get('last_token_date'), + "total_tokens": bing_token_status.get('total_tokens', 0) + } + } + + except Exception as e: + logger.error(f"Error getting platform status for user {user_id}: {e}") + return { + "gsc": {"connected": False, "sites": [], "last_sync": None, "status": "error"}, + "bing": {"connected": False, "sites": [], "last_sync": None, "status": "error"} + } + + async def get_dashboard_overview(self, user_id: str, site_url: Optional[str] = None) -> Dict[str, Any]: + """Get comprehensive dashboard overview with real GSC/Bing data.""" + try: + # Get user's website URL if not provided + if not site_url: + # Try to get from website analysis first + website_analysis = self.user_data_service.get_user_website_analysis(int(user_id)) + if website_analysis and website_analysis.get('website_url'): + site_url = website_analysis['website_url'] + else: + # Fallback: try to get from Bing sites + bing_sites = self._get_bing_sites(user_id) + if bing_sites: + site_url = bing_sites[0] # Use first Bing site + else: + site_url = 'https://alwrity.com' # Default fallback + + # Get platform status + platform_status = await self.get_platform_status(user_id) + + # Get analytics data + gsc_data = await self.get_gsc_data(user_id, site_url) + bing_data = await self.get_bing_data(user_id, site_url) + + # Aggregate metrics + summary = self.analytics_aggregator.combine_metrics(gsc_data, bing_data) + timeseries = self.analytics_aggregator.normalize_timeseries( + gsc_data.get("timeseries", []), + bing_data.get("timeseries", []) + ) + + # Get competitive insights + competitor_insights = await self.competitive_analyzer.get_competitive_insights(user_id) + + # Calculate health score + health_score = self._calculate_health_score(summary, platform_status) + + # Generate AI insights + ai_insights = await self._generate_ai_insights(summary, timeseries, competitor_insights) + + return { + "website_url": site_url, + "platforms": platform_status, + "summary": summary, + "timeseries": timeseries, + "competitor_insights": competitor_insights, + "health_score": health_score, + "ai_insights": ai_insights, + "last_updated": datetime.now().isoformat() + } + + except Exception as e: + logger.error(f"Error getting dashboard overview for user {user_id}: {e}") + raise + + async def get_gsc_data(self, user_id: str, site_url: Optional[str] = None) -> Dict[str, Any]: + """Get GSC data for the specified site.""" + try: + # Check if user has GSC credentials + credentials = self.gsc_service.load_user_credentials(user_id) + if not credentials: + return {"error": "GSC not connected", "data": [], "status": "disconnected"} + + # Try to get from cache first + cache_key = f"gsc_analytics:{user_id}:{site_url or 'default'}" + cached_data = self.analytics_cache.get('gsc_analytics', user_id, site_url=site_url or 'default') + if cached_data: + return cached_data + + # Fetch fresh data from GSC API + if site_url: + gsc_data = self.gsc_service.get_search_analytics(user_id, site_url) + else: + # Get all sites for user + sites = self._get_gsc_sites(user_id) + if sites: + gsc_data = self.gsc_service.get_search_analytics(user_id, sites[0]) + else: + return {"error": "No GSC sites found", "data": [], "status": "disconnected"} + + # Cache the data + self.analytics_cache.set('gsc_analytics', user_id, gsc_data, ttl_override=3600, site_url=site_url or 'default') # 1 hour cache + + return gsc_data + + except Exception as e: + logger.error(f"Error getting GSC data for user {user_id}: {e}") + return {"error": str(e), "data": [], "status": "error"} + + async def get_bing_data(self, user_id: str, site_url: Optional[str] = None) -> Dict[str, Any]: + """Get Bing Webmaster Tools data for the specified site.""" + try: + # Check if user has Bing tokens + tokens = self.bing_oauth.get_user_tokens(user_id) + if not tokens: + return {"error": "Bing not connected", "data": [], "status": "disconnected"} + + # Try to get from cache first + cache_key = f"bing_analytics:{user_id}:{site_url or 'default'}" + cached_data = self.analytics_cache.get('bing_analytics', user_id, site_url=site_url or 'default') + if cached_data: + return cached_data + + # Get data from Bing storage service + if site_url: + bing_data = self.bing_storage.get_analytics_summary(user_id, site_url, days=30) + else: + # Get all sites for user + sites = self._get_bing_sites(user_id) + if sites: + logger.info(f"Using first Bing site for analysis: {sites[0]}") + bing_data = self.bing_storage.get_analytics_summary(user_id, sites[0], days=30) + else: + logger.warning(f"No Bing sites found for user {user_id}") + return {"error": "No Bing sites found", "data": [], "status": "disconnected"} + + # Cache the data + self.analytics_cache.set('bing_analytics', user_id, bing_data, ttl_override=3600, site_url=site_url or 'default') # 1 hour cache + + return bing_data + + except Exception as e: + logger.error(f"Error getting Bing data for user {user_id}: {e}") + return {"error": str(e), "data": [], "status": "error"} + + async def get_competitive_insights(self, user_id: str) -> Dict[str, Any]: + """Get competitive insights from onboarding step 3 data.""" + try: + return await self.competitive_analyzer.get_competitive_insights(user_id) + except Exception as e: + logger.error(f"Error getting competitive insights for user {user_id}: {e}") + return { + "competitor_keywords": [], + "content_gaps": [], + "opportunity_score": 0 + } + + async def refresh_analytics_data(self, user_id: str, site_url: Optional[str] = None) -> Dict[str, Any]: + """Refresh analytics data by invalidating cache and fetching fresh data.""" + try: + # Invalidate cache + cache_keys = [ + f"gsc_analytics:{user_id}", + f"bing_analytics:{user_id}", + f"gsc_analytics:{user_id}:{site_url or 'default'}", + f"bing_analytics:{user_id}:{site_url or 'default'}" + ] + + for key in cache_keys: + self.analytics_cache.delete(key) + + # Fetch fresh data + gsc_result = await self.get_gsc_data(user_id, site_url) + bing_result = await self.get_bing_data(user_id, site_url) + + return { + "status": "success", + "message": "Analytics data refreshed successfully", + "last_updated": datetime.now().isoformat(), + "platforms": { + "gsc": {"status": "success" if "error" not in gsc_result else "error"}, + "bing": {"status": "success" if "error" not in bing_result else "error"} + } + } + + except Exception as e: + logger.error(f"Error refreshing analytics data for user {user_id}: {e}") + return { + "status": "error", + "message": f"Failed to refresh analytics data: {str(e)}", + "last_updated": datetime.now().isoformat() + } + + def _get_gsc_sites(self, user_id: str) -> List[str]: + """Get GSC sites for user.""" + try: + credentials = self.gsc_service.load_user_credentials(user_id) + if not credentials: + return [] + + # This would need to be implemented in GSCService + # For now, return empty list + return [] + except Exception as e: + logger.error(f"Error getting GSC sites for user {user_id}: {e}") + return [] + + def _get_bing_sites(self, user_id: str) -> List[str]: + """Get Bing sites for user.""" + try: + # Use the existing get_user_sites method from BingOAuthService + sites = self.bing_oauth.get_user_sites(user_id) + if not sites: + logger.warning(f"No Bing sites found for user {user_id}") + return [] + + # Extract site URLs from the sites data + site_urls = [] + for site in sites: + if isinstance(site, dict) and site.get('url'): + site_urls.append(site['url']) + elif isinstance(site, str): + site_urls.append(site) + + logger.info(f"Found {len(site_urls)} Bing sites for user {user_id}: {site_urls}") + return site_urls + + except Exception as e: + logger.error(f"Error getting Bing sites for user {user_id}: {e}") + return [] + + def _calculate_health_score(self, summary: Dict[str, Any], platform_status: Dict[str, Any]) -> Dict[str, Any]: + """Calculate overall SEO health score.""" + try: + score = 0 + max_score = 100 + + # Base score for connected platforms + if platform_status.get("gsc", {}).get("connected"): + score += 30 + if platform_status.get("bing", {}).get("connected"): + score += 20 + + # Traffic score (0-30) + clicks = summary.get("clicks", 0) + if clicks > 1000: + score += 30 + elif clicks > 500: + score += 20 + elif clicks > 100: + score += 10 + + # CTR score (0-20) + ctr = summary.get("ctr", 0) + if ctr > 0.05: # 5% + score += 20 + elif ctr > 0.03: # 3% + score += 15 + elif ctr > 0.01: # 1% + score += 10 + + # Determine trend and color + if score >= 80: + trend = "up" + label = "EXCELLENT" + color = "#4CAF50" + elif score >= 60: + trend = "stable" + label = "GOOD" + color = "#2196F3" + elif score >= 40: + trend = "down" + label = "NEEDS IMPROVEMENT" + color = "#FF9800" + else: + trend = "down" + label = "POOR" + color = "#F44336" + + return { + "score": score, + "change": 0, # Would need historical data to calculate + "trend": trend, + "label": label, + "color": color + } + + except Exception as e: + logger.error(f"Error calculating health score: {e}") + return { + "score": 0, + "change": 0, + "trend": "unknown", + "label": "UNKNOWN", + "color": "#9E9E9E" + } + + async def _generate_ai_insights(self, summary: Dict[str, Any], timeseries: List[Dict[str, Any]], competitor_insights: Dict[str, Any]) -> List[Dict[str, Any]]: + """Generate AI insights from analytics data.""" + try: + insights = [] + + # Traffic insights + clicks = summary.get("clicks", 0) + ctr = summary.get("ctr", 0) + + if clicks > 0 and ctr < 0.02: # Low CTR + insights.append({ + "type": "opportunity", + "priority": "high", + "text": f"Your CTR is {ctr:.1%}, which is below average. Consider optimizing your meta descriptions and titles.", + "category": "performance" + }) + + # Competitive insights + opportunity_score = competitor_insights.get("opportunity_score", 0) + if opportunity_score > 70: + insights.append({ + "type": "opportunity", + "priority": "high", + "text": f"High opportunity score of {opportunity_score}% - competitors are ranking for keywords you're not targeting.", + "category": "competitive" + }) + + # Content gaps + content_gaps = competitor_insights.get("content_gaps", []) + if content_gaps: + insights.append({ + "type": "action", + "priority": "medium", + "text": f"Found {len(content_gaps)} content gaps. Consider creating content for these topics.", + "category": "content" + }) + + return insights + + except Exception as e: + logger.error(f"Error generating AI insights: {e}") + return [] \ No newline at end of file diff --git a/backend/services/seo_analyzer/utils.py b/backend/services/seo_analyzer/utils.py index 724d053b..af9bdf22 100644 --- a/backend/services/seo_analyzer/utils.py +++ b/backend/services/seo_analyzer/utils.py @@ -18,14 +18,43 @@ class HTMLFetcher: }) def fetch_html(self, url: str) -> Optional[str]: - """Fetch HTML content with error handling""" + """Fetch HTML content with retries and protocol fallback.""" + def _try_fetch(target_url: str, timeout_s: int = 30) -> Optional[str]: + try: + response = self.session.get( + target_url, + timeout=timeout_s, + allow_redirects=True, + ) + response.raise_for_status() + return response.text + except Exception as inner_e: + logger.error(f"Error fetching HTML from {target_url}: {inner_e}") + return None + + # First attempt + html = _try_fetch(url, timeout_s=30) + if html is not None: + return html + + # Retry once (shorter timeout) + html = _try_fetch(url, timeout_s=15) + if html is not None: + return html + + # If https fails due to resets, try http fallback once try: - response = self.session.get(url, timeout=30) - response.raise_for_status() - return response.text - except Exception as e: - logger.error(f"Error fetching HTML from {url}: {e}") - return None + if url.startswith("https://"): + http_url = "http://" + url[len("https://"):] + logger.info(f"SEO Analyzer: Falling back to HTTP for {http_url}") + html = _try_fetch(http_url, timeout_s=15) + if html is not None: + return html + except Exception: + # Best-effort fallback; errors already logged in _try_fetch + pass + + return None class AIInsightGenerator: diff --git a/backend/services/strategy_copilot_service.py b/backend/services/strategy_copilot_service.py index bec0959d..c8086694 100644 --- a/backend/services/strategy_copilot_service.py +++ b/backend/services/strategy_copilot_service.py @@ -1,7 +1,7 @@ from typing import Dict, Any, List, Optional from sqlalchemy.orm import Session from loguru import logger -from services.onboarding_data_service import OnboardingDataService +from services.onboarding.data_service import OnboardingDataService from services.user_data_service import UserDataService from services.llm_providers.gemini_provider import gemini_text_response, gemini_structured_json_response diff --git a/backend/services/subscription/README.md b/backend/services/subscription/README.md new file mode 100644 index 00000000..1e690459 --- /dev/null +++ b/backend/services/subscription/README.md @@ -0,0 +1,185 @@ +# Subscription Services Package + +## Overview + +This package consolidates all subscription, billing, and usage tracking related services and middleware into a single, well-organized module. This follows the same architectural pattern as the onboarding package for consistency and maintainability. + +## Package Structure + +``` +backend/services/subscription/ +β”œβ”€β”€ __init__.py # Package exports +β”œβ”€β”€ pricing_service.py # API pricing and cost calculations +β”œβ”€β”€ usage_tracking_service.py # Usage tracking and limits +β”œβ”€β”€ exception_handler.py # Exception handling +β”œβ”€β”€ monitoring_middleware.py # API monitoring with usage tracking +└── README.md # This documentation +``` + +## Services + +### PricingService +- **File**: `pricing_service.py` +- **Purpose**: Manages API pricing, cost calculation, and subscription limits +- **Key Features**: + - Dynamic pricing based on API provider and model + - Cost calculation for input/output tokens + - Subscription limit enforcement + - Billing period management + +### UsageTrackingService +- **File**: `usage_tracking_service.py` +- **Purpose**: Comprehensive tracking of API usage, costs, and subscription limits +- **Key Features**: + - Real-time usage tracking + - Cost calculation and billing + - Usage limit enforcement with TTL caching + - Usage alerts and notifications + +### SubscriptionExceptionHandler +- **File**: `exception_handler.py` +- **Purpose**: Centralized exception handling for subscription-related errors +- **Key Features**: + - Custom exception types + - Error handling decorators + - Consistent error responses + +### Monitoring Middleware +- **File**: `monitoring_middleware.py` +- **Purpose**: FastAPI middleware for API monitoring and usage tracking +- **Key Features**: + - Request/response monitoring + - Usage tracking integration + - Performance metrics + - Database API monitoring + +## Usage + +### Import Pattern + +Always use the consolidated package for subscription-related imports: + +```python +# βœ… Correct - Use consolidated package +from services.subscription import PricingService, UsageTrackingService +from services.subscription import SubscriptionExceptionHandler +from services.subscription import check_usage_limits_middleware + +# ❌ Incorrect - Old scattered imports +from services.pricing_service import PricingService +from services.usage_tracking_service import UsageTrackingService +from middleware.monitoring_middleware import check_usage_limits_middleware +``` + +### Service Initialization + +```python +from services.subscription import PricingService, UsageTrackingService +from services.database import get_db + +# Get database session +db = next(get_db()) + +# Initialize services +pricing_service = PricingService(db) +usage_service = UsageTrackingService(db) +``` + +### Middleware Registration + +```python +from services.subscription import monitoring_middleware + +# Register middleware in FastAPI app +app.middleware("http")(monitoring_middleware) +``` + +## Database Models + +The subscription services use the following database models (defined in `backend/models/subscription_models.py`): + +- `APIProvider` - API provider enumeration +- `SubscriptionPlan` - Subscription plan definitions +- `UserSubscription` - User subscription records +- `UsageSummary` - Usage summary by billing period +- `APIUsageLog` - Individual API usage logs +- `APIProviderPricing` - Pricing configuration +- `UsageAlert` - Usage limit alerts +- `SubscriptionTier` - Subscription tier definitions +- `BillingCycle` - Billing cycle enumeration +- `UsageStatus` - Usage status enumeration + +## Key Features + +### 1. Database-Only Persistence +- All data stored in database tables +- No file-based storage +- User-isolated data access + +### 2. TTL Caching +- In-memory caching for performance +- 30-second TTL for usage limit checks +- 10-minute TTL for dashboard data + +### 3. Real-time Monitoring +- Live API usage tracking +- Performance metrics collection +- Error rate monitoring + +### 4. Flexible Pricing +- Per-provider pricing configuration +- Model-specific pricing +- Dynamic cost calculation + +## Error Handling + +The package provides comprehensive error handling: + +```python +from services.subscription import ( + SubscriptionException, + UsageLimitExceededException, + PricingException, + TrackingException +) + +try: + # Subscription operation + pass +except UsageLimitExceededException as e: + # Handle usage limit exceeded + pass +except PricingException as e: + # Handle pricing error + pass +``` + +## Configuration + +The services use environment variables for configuration: + +- `SUBSCRIPTION_DASHBOARD_NOCACHE` - Bypass dashboard cache +- `ENABLE_ALPHA` - Enable alpha features (default: false) + +## Migration from Old Structure + +This package consolidates the following previously scattered files: + +- `backend/services/pricing_service.py` β†’ `subscription/pricing_service.py` +- `backend/services/usage_tracking_service.py` β†’ `subscription/usage_tracking_service.py` +- `backend/services/subscription_exception_handler.py` β†’ `subscription/exception_handler.py` +- `backend/middleware/monitoring_middleware.py` β†’ `subscription/monitoring_middleware.py` + +## Benefits + +1. **Single Package**: All subscription logic in one location +2. **Clear Ownership**: Easy to find subscription-related code +3. **Better Organization**: Follows same pattern as onboarding +4. **Easier Maintenance**: Single source of truth for billing logic +5. **Consistent Architecture**: Matches onboarding consolidation + +## Related Packages + +- `services.onboarding` - Onboarding and user setup +- `models.subscription_models` - Database models +- `api.subscription_api` - API endpoints diff --git a/backend/services/subscription/__init__.py b/backend/services/subscription/__init__.py new file mode 100644 index 00000000..51c98463 --- /dev/null +++ b/backend/services/subscription/__init__.py @@ -0,0 +1,40 @@ +# Subscription Services Package +# Consolidated subscription-related services and middleware + +from .pricing_service import PricingService +from .usage_tracking_service import UsageTrackingService +from .exception_handler import ( + SubscriptionException, + SubscriptionExceptionHandler, + UsageLimitExceededException, + PricingException, + TrackingException, + handle_usage_limit_error, + handle_pricing_error, + handle_tracking_error, +) +from .monitoring_middleware import ( + DatabaseAPIMonitor, + check_usage_limits_middleware, + monitoring_middleware, + get_monitoring_stats, + get_lightweight_stats, +) + +__all__ = [ + "PricingService", + "UsageTrackingService", + "SubscriptionException", + "SubscriptionExceptionHandler", + "UsageLimitExceededException", + "PricingException", + "TrackingException", + "handle_usage_limit_error", + "handle_pricing_error", + "handle_tracking_error", + "DatabaseAPIMonitor", + "check_usage_limits_middleware", + "monitoring_middleware", + "get_monitoring_stats", + "get_lightweight_stats", +] diff --git a/backend/services/subscription_exception_handler.py b/backend/services/subscription/exception_handler.py similarity index 95% rename from backend/services/subscription_exception_handler.py rename to backend/services/subscription/exception_handler.py index e156d008..a2a06082 100644 --- a/backend/services/subscription_exception_handler.py +++ b/backend/services/subscription/exception_handler.py @@ -152,24 +152,8 @@ class SubscriptionExceptionHandler: def _setup_logging(self): """Setup structured logging for subscription errors.""" - # Configure loguru for subscription-specific logging - logger.add( - "logs/subscription_errors.log", - rotation="1 day", - retention="30 days", - level="ERROR", - format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} | {message}", - filter=lambda record: "subscription" in record["name"].lower() - ) - - logger.add( - "logs/usage_tracking.log", - rotation="1 day", - retention="90 days", - level="INFO", - format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}", - filter=lambda record: "usage_tracking" in str(record["message"]).lower() - ) + from utils.logger_utils import get_service_logger + return get_service_logger("subscription_exception_handler") def handle_exception( self, @@ -425,4 +409,4 @@ def handle_subscription_errors(db: Session = None): return handler.handle_exception(e) return wrapper - return decorator \ No newline at end of file + return decorator diff --git a/backend/services/subscription/monitoring_middleware.py b/backend/services/subscription/monitoring_middleware.py new file mode 100644 index 00000000..edd8409c --- /dev/null +++ b/backend/services/subscription/monitoring_middleware.py @@ -0,0 +1,373 @@ +""" +Enhanced FastAPI Monitoring Middleware +Database-backed monitoring for API calls, errors, performance metrics, and usage tracking. +Includes comprehensive subscription-based usage monitoring and cost tracking. +""" + +from fastapi import Request, Response +from fastapi.responses import JSONResponse +import time +import json +from datetime import datetime, timedelta +from typing import Dict, List, Any, Optional +from collections import defaultdict, deque +import asyncio +from loguru import logger +from sqlalchemy.orm import Session +from sqlalchemy import and_, func +import re + +from models.api_monitoring import APIRequest, APIEndpointStats, SystemHealth, CachePerformance +from models.subscription_models import APIProvider +from services.database import get_db +from .usage_tracking_service import UsageTrackingService +from .pricing_service import PricingService + +class DatabaseAPIMonitor: + """Database-backed API monitoring with usage tracking and subscription management.""" + + def __init__(self): + self.cache_stats = { + 'hits': 0, + 'misses': 0, + 'hit_rate': 0.0 + } + # API provider detection patterns - Updated to match actual endpoints + self.provider_patterns = { + APIProvider.GEMINI: [ + r'gemini', r'google.*ai' + ], + APIProvider.OPENAI: [r'openai', r'gpt', r'chatgpt'], + APIProvider.ANTHROPIC: [r'anthropic', r'claude'], + APIProvider.MISTRAL: [r'mistral'], + APIProvider.TAVILY: [r'tavily'], + APIProvider.SERPER: [r'serper'], + APIProvider.METAPHOR: [r'metaphor', r'/exa'], + APIProvider.FIRECRAWL: [r'firecrawl'] + } + + def detect_api_provider(self, path: str, user_agent: str = None) -> Optional[APIProvider]: + """Detect which API provider is being used based on request details.""" + path_lower = path.lower() + user_agent_lower = (user_agent or '').lower() + + # Permanently ignore internal route families that must not accrue or check provider usage + if path_lower.startswith('/api/onboarding/') or path_lower.startswith('/api/subscription/'): + return None + + for provider, patterns in self.provider_patterns.items(): + for pattern in patterns: + if re.search(pattern, path_lower) or re.search(pattern, user_agent_lower): + return provider + + return None + + def extract_usage_metrics(self, request_body: str = None, response_body: str = None) -> Dict[str, Any]: + """Extract usage metrics from request/response bodies.""" + metrics = { + 'tokens_input': 0, + 'tokens_output': 0, + 'model_used': None, + 'search_count': 0, + 'image_count': 0, + 'page_count': 0 + } + + try: + # Try to parse request body for input tokens/content + if request_body: + request_data = json.loads(request_body) if isinstance(request_body, str) else request_body + + # Extract model information + if 'model' in request_data: + metrics['model_used'] = request_data['model'] + + # Estimate input tokens from prompt/content + if 'prompt' in request_data: + metrics['tokens_input'] = self._estimate_tokens(request_data['prompt']) + elif 'messages' in request_data: + total_content = ' '.join([msg.get('content', '') for msg in request_data['messages']]) + metrics['tokens_input'] = self._estimate_tokens(total_content) + elif 'input' in request_data: + metrics['tokens_input'] = self._estimate_tokens(str(request_data['input'])) + + # Count specific request types + if 'query' in request_data or 'search' in request_data: + metrics['search_count'] = 1 + if 'image' in request_data or 'generate_image' in request_data: + metrics['image_count'] = 1 + if 'url' in request_data or 'crawl' in request_data: + metrics['page_count'] = 1 + + # Try to parse response body for output tokens + if response_body: + response_data = json.loads(response_body) if isinstance(response_body, str) else response_body + + # Extract output content and estimate tokens + if 'text' in response_data: + metrics['tokens_output'] = self._estimate_tokens(response_data['text']) + elif 'content' in response_data: + metrics['tokens_output'] = self._estimate_tokens(str(response_data['content'])) + elif 'choices' in response_data and response_data['choices']: + choice = response_data['choices'][0] + if 'message' in choice and 'content' in choice['message']: + metrics['tokens_output'] = self._estimate_tokens(choice['message']['content']) + + # Extract actual token usage if provided by API + if 'usage' in response_data: + usage = response_data['usage'] + if 'prompt_tokens' in usage: + metrics['tokens_input'] = usage['prompt_tokens'] + if 'completion_tokens' in usage: + metrics['tokens_output'] = usage['completion_tokens'] + + except (json.JSONDecodeError, KeyError, TypeError) as e: + logger.debug(f"Could not extract usage metrics: {e}") + + return metrics + + def _estimate_tokens(self, text: str) -> int: + """Estimate token count for text (rough approximation).""" + if not text: + return 0 + # Rough estimation: 1.3 tokens per word on average + word_count = len(str(text).split()) + return int(word_count * 1.3) + +async def check_usage_limits_middleware(request: Request, user_id: str, request_body: str = None) -> Optional[JSONResponse]: + """Check usage limits before processing request.""" + if not user_id: + return None + + # No special whitelist; onboarding/subscription are ignored by provider detection + try: + path = request.url.path + except Exception: + pass + + try: + db = next(get_db()) + api_monitor = DatabaseAPIMonitor() + + # Detect if this is an API call that should be rate limited + api_provider = api_monitor.detect_api_provider(request.url.path, request.headers.get('user-agent')) + if not api_provider: + return None + + # Use provided request body or read it if not provided + if request_body is None: + try: + if hasattr(request, '_body'): + request_body = request._body + else: + # Try to read body (this might not work in all cases) + body = await request.body() + request_body = body.decode('utf-8') if body else None + except: + pass + + # Estimate tokens needed + tokens_requested = 0 + if request_body: + usage_metrics = api_monitor.extract_usage_metrics(request_body) + tokens_requested = usage_metrics.get('tokens_input', 0) + + # Check limits + usage_service = UsageTrackingService(db) + can_proceed, message, usage_info = await usage_service.enforce_usage_limits( + user_id=user_id, + provider=api_provider, + tokens_requested=tokens_requested + ) + + if not can_proceed: + logger.warning(f"Usage limit exceeded for {user_id}: {message}") + return JSONResponse( + status_code=429, + content={ + "error": "Usage limit exceeded", + "message": message, + "usage_info": usage_info, + "provider": api_provider.value + } + ) + + # Warn if approaching limits + if usage_info.get('call_usage_percentage', 0) >= 80 or usage_info.get('cost_usage_percentage', 0) >= 80: + logger.warning(f"User {user_id} approaching usage limits: {usage_info}") + + return None + + except Exception as e: + logger.error(f"Error checking usage limits: {e}") + # Don't block requests if usage checking fails + return None + finally: + db.close() + +async def monitoring_middleware(request: Request, call_next): + """Enhanced FastAPI middleware for monitoring API calls with usage tracking.""" + start_time = time.time() + + # Get database session + db = next(get_db()) + + # Extract request details - Enhanced user identification + user_id = None + try: + # PRIORITY 1: Check request.state.user_id (set by API key injection middleware) + if hasattr(request.state, 'user_id') and request.state.user_id: + user_id = request.state.user_id + logger.debug(f"Monitoring: Using user_id from request.state: {user_id}") + + # PRIORITY 2: Check query parameters + elif hasattr(request, 'query_params') and 'user_id' in request.query_params: + user_id = request.query_params['user_id'] + elif hasattr(request, 'path_params') and 'user_id' in request.path_params: + user_id = request.path_params['user_id'] + + # PRIORITY 3: Check headers for user identification + elif 'x-user-id' in request.headers: + user_id = request.headers['x-user-id'] + elif 'x-user-email' in request.headers: + user_id = request.headers['x-user-email'] # Use email as user identifier + elif 'x-session-id' in request.headers: + user_id = request.headers['x-session-id'] # Use session as fallback + + # Check for authorization header with user info + elif 'authorization' in request.headers: + # Auth middleware should have set request.state.user_id + # If not, this indicates an authentication failure that should be logged + user_id = None + logger.warning("Monitoring: Auth header present but no user_id in state - authentication may have failed") + + # Final fallback: None (skip usage limits for truly anonymous/unauthenticated) + else: + user_id = None + + except Exception as e: + logger.debug(f"Error extracting user ID: {e}") + user_id = None # On error, skip usage limits + + # Capture request body for usage tracking (read once, safely) + request_body = None + try: + # Only read body for POST/PUT/PATCH requests to avoid issues + if request.method in ['POST', 'PUT', 'PATCH']: + if hasattr(request, '_body') and request._body: + request_body = request._body.decode('utf-8') + else: + # Read body only if it hasn't been read yet + try: + body = await request.body() + request_body = body.decode('utf-8') if body else None + except Exception as body_error: + logger.debug(f"Could not read request body: {body_error}") + request_body = None + except Exception as e: + logger.debug(f"Error capturing request body: {e}") + request_body = None + + # Check usage limits before processing + limit_response = await check_usage_limits_middleware(request, user_id, request_body) + if limit_response: + return limit_response + + try: + response = await call_next(request) + status_code = response.status_code + duration = time.time() - start_time + + # Capture response body for usage tracking + response_body = None + try: + if hasattr(response, 'body'): + response_body = response.body.decode('utf-8') if response.body else None + elif hasattr(response, '_content'): + response_body = response._content.decode('utf-8') if response._content else None + except: + pass + + # Track API usage if this is an API call to external providers + api_monitor = DatabaseAPIMonitor() + api_provider = api_monitor.detect_api_provider(request.url.path, request.headers.get('user-agent')) + if api_provider and user_id: + logger.info(f"Detected API call: {request.url.path} -> {api_provider.value} for user: {user_id}") + try: + # Extract usage metrics + usage_metrics = api_monitor.extract_usage_metrics(request_body, response_body) + + # Track usage with the usage tracking service + usage_service = UsageTrackingService(db) + await usage_service.track_api_usage( + user_id=user_id, + provider=api_provider, + endpoint=request.url.path, + method=request.method, + model_used=usage_metrics.get('model_used'), + tokens_input=usage_metrics.get('tokens_input', 0), + tokens_output=usage_metrics.get('tokens_output', 0), + response_time=duration, + status_code=status_code, + request_size=len(request_body) if request_body else None, + response_size=len(response_body) if response_body else None, + user_agent=request.headers.get('user-agent'), + ip_address=request.client.host if request.client else None, + search_count=usage_metrics.get('search_count', 0), + image_count=usage_metrics.get('image_count', 0), + page_count=usage_metrics.get('page_count', 0) + ) + except Exception as usage_error: + logger.error(f"Error tracking API usage: {usage_error}") + # Don't fail the main request if usage tracking fails + + return response + + except Exception as e: + duration = time.time() - start_time + status_code = 500 + + # Store minimal error info + logger.error(f"API Error: {request.method} {request.url.path} - {str(e)}") + + return JSONResponse( + status_code=500, + content={"error": "Internal server error"} + ) + finally: + db.close() + +async def get_monitoring_stats(minutes: int = 5) -> Dict[str, Any]: + """Get current monitoring statistics.""" + db = next(get_db()) + try: + # Placeholder to match old API; heavy stats handled elsewhere + return { + 'timestamp': datetime.utcnow().isoformat(), + 'overview': { + 'recent_requests': 0, + 'recent_errors': 0, + }, + 'cache_performance': {'hits': 0, 'misses': 0, 'hit_rate': 0.0}, + 'recent_errors': [], + 'system_health': {'status': 'healthy', 'error_rate': 0.0} + } + finally: + db.close() + +async def get_lightweight_stats() -> Dict[str, Any]: + """Get lightweight stats for dashboard header.""" + db = next(get_db()) + try: + # Minimal viable placeholder values + now = datetime.utcnow() + return { + 'status': 'healthy', + 'icon': '🟒', + 'recent_requests': 0, + 'recent_errors': 0, + 'error_rate': 0.0, + 'timestamp': now.isoformat() + } + finally: + db.close() diff --git a/backend/services/pricing_service.py b/backend/services/subscription/pricing_service.py similarity index 85% rename from backend/services/pricing_service.py rename to backend/services/subscription/pricing_service.py index 8da06d76..40a2ab09 100644 --- a/backend/services/pricing_service.py +++ b/backend/services/subscription/pricing_service.py @@ -21,6 +21,53 @@ class PricingService: self.db = db self._pricing_cache = {} self._plans_cache = {} + # Lightweight in-process cache for limit checks + # key: f"{user_id}:{provider}", value: { 'result': (bool, str, dict), 'expires_at': datetime } + self._limits_cache: Dict[str, Dict[str, Any]] = {} + + # ------------------- Billing period helpers ------------------- + def _compute_next_period_end(self, start: datetime, cycle: str) -> datetime: + """Compute the next period end given a start and billing cycle.""" + try: + cycle_value = cycle.value if hasattr(cycle, 'value') else str(cycle) + except Exception: + cycle_value = str(cycle) + if cycle_value == 'yearly': + return start + timedelta(days=365) + return start + timedelta(days=30) + + def _ensure_subscription_current(self, subscription) -> bool: + """Auto-advance subscription period if expired and auto_renew is enabled.""" + if not subscription: + return False + now = datetime.utcnow() + try: + if subscription.current_period_end and subscription.current_period_end < now: + if getattr(subscription, 'auto_renew', False): + subscription.current_period_start = now + subscription.current_period_end = self._compute_next_period_end(now, subscription.billing_cycle) + # Keep status active if model enum else string + try: + subscription.status = subscription.status.ACTIVE # type: ignore[attr-defined] + except Exception: + setattr(subscription, 'status', 'active') + self.db.commit() + else: + return False + except Exception: + self.db.rollback() + return True + + def get_current_billing_period(self, user_id: str) -> Optional[str]: + """Return current billing period key (YYYY-MM) after ensuring subscription is current.""" + subscription = self.db.query(UserSubscription).filter( + UserSubscription.user_id == user_id, + UserSubscription.is_active == True + ).first() + # Ensure subscription is current (advance if auto_renew) + self._ensure_subscription_current(subscription) + # Continue to use YYYY-MM for summaries + return datetime.now().strftime("%Y-%m") def initialize_default_pricing(self): """Initialize default pricing for all API providers.""" @@ -374,7 +421,9 @@ class PricingService: if free_plan: return self._plan_to_limits_dict(free_plan) return None - + + # Ensure current period before returning limits + self._ensure_subscription_current(subscription) return self._plan_to_limits_dict(subscription.plan) def _plan_to_limits_dict(self, plan: SubscriptionPlan) -> Dict[str, Any]: @@ -404,14 +453,20 @@ class PricingService: def check_usage_limits(self, user_id: str, provider: APIProvider, tokens_requested: int = 0) -> Tuple[bool, str, Dict[str, Any]]: """Check if user can make an API call within their limits.""" - + # Short TTL cache to reduce DB reads under sustained traffic + cache_key = f"{user_id}:{provider.value}" + now = datetime.utcnow() + cached = self._limits_cache.get(cache_key) + if cached and cached.get('expires_at') and cached['expires_at'] > now: + return tuple(cached['result']) # type: ignore + # Get user limits limits = self.get_user_limits(user_id) if not limits: return False, "No subscription plan found", {} # Get current usage for this billing period - current_period = datetime.now().strftime("%Y-%m") + current_period = self.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m") usage = self.db.query(UsageSummary).filter( UsageSummary.user_id == user_id, UsageSummary.billing_period == current_period @@ -432,11 +487,16 @@ class PricingService: call_limit = limits['limits'].get(f"{provider_name}_calls", 0) if call_limit > 0 and current_calls >= call_limit: - return False, f"API call limit reached for {provider_name}", { + result = (False, f"API call limit reached for {provider_name}", { 'current_calls': current_calls, 'limit': call_limit, 'usage_percentage': 100.0 + }) + self._limits_cache[cache_key] = { + 'result': result, + 'expires_at': now + timedelta(seconds=30) } + return result # Check token limits for LLM providers if provider in [APIProvider.GEMINI, APIProvider.OPENAI, APIProvider.ANTHROPIC, APIProvider.MISTRAL]: @@ -444,34 +504,48 @@ class PricingService: token_limit = limits['limits'].get(f"{provider_name}_tokens", 0) if token_limit > 0 and (current_tokens + tokens_requested) > token_limit: - return False, f"Token limit would be exceeded for {provider_name}", { + result = (False, f"Token limit would be exceeded for {provider_name}", { 'current_tokens': current_tokens, 'requested_tokens': tokens_requested, 'limit': token_limit, 'usage_percentage': ((current_tokens + tokens_requested) / token_limit) * 100 + }) + self._limits_cache[cache_key] = { + 'result': result, + 'expires_at': now + timedelta(seconds=30) } + return result # Check cost limits cost_limit = limits['limits'].get('monthly_cost', 0) if cost_limit > 0 and usage.total_cost >= cost_limit: - return False, "Monthly cost limit reached", { + result = (False, "Monthly cost limit reached", { 'current_cost': usage.total_cost, 'limit': cost_limit, 'usage_percentage': 100.0 + }) + self._limits_cache[cache_key] = { + 'result': result, + 'expires_at': now + timedelta(seconds=30) } + return result # Calculate usage percentages for warnings call_usage_pct = (current_calls / max(call_limit, 1)) * 100 if call_limit > 0 else 0 cost_usage_pct = (usage.total_cost / max(cost_limit, 1)) * 100 if cost_limit > 0 else 0 - - return True, "Within limits", { + result = (True, "Within limits", { 'current_calls': current_calls, 'call_limit': call_limit, 'call_usage_percentage': call_usage_pct, 'current_cost': usage.total_cost, 'cost_limit': cost_limit, 'cost_usage_percentage': cost_usage_pct + }) + self._limits_cache[cache_key] = { + 'result': result, + 'expires_at': now + timedelta(seconds=30) } + return result def estimate_tokens(self, text: str, provider: APIProvider) -> int: """Estimate token count for text based on provider.""" @@ -517,4 +591,4 @@ class PricingService: 'cost_per_image': pricing.cost_per_image, 'cost_per_page': pricing.cost_per_page, 'description': pricing.description - } \ No newline at end of file + } diff --git a/backend/services/usage_tracking_service.py b/backend/services/subscription/usage_tracking_service.py similarity index 94% rename from backend/services/usage_tracking_service.py rename to backend/services/subscription/usage_tracking_service.py index 503cd395..de8e64f3 100644 --- a/backend/services/usage_tracking_service.py +++ b/backend/services/subscription/usage_tracking_service.py @@ -14,7 +14,7 @@ from models.subscription_models import ( APIUsageLog, UsageSummary, APIProvider, UsageAlert, UserSubscription, UsageStatus ) -from services.pricing_service import PricingService +from .pricing_service import PricingService class UsageTrackingService: """Service for tracking API usage and managing subscription limits.""" @@ -22,6 +22,9 @@ class UsageTrackingService: def __init__(self, db: Session): self.db = db self.pricing_service = PricingService(db) + # TTL cache (30s) for enforcement results to cut DB chatter + # key: f"{user_id}:{provider}", value: { 'result': (bool,str,dict), 'expires_at': datetime } + self._enforce_cache: Dict[str, Dict[str, Any]] = {} async def track_api_usage(self, user_id: str, provider: APIProvider, endpoint: str, method: str, model_used: str = None, @@ -54,7 +57,7 @@ class UsageTrackingService: ) # Create usage log entry - billing_period = datetime.now().strftime("%Y-%m") + billing_period = self.pricing_service.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m") usage_log = APIUsageLog( user_id=user_id, provider=provider, @@ -294,7 +297,7 @@ class UsageTrackingService: """Get comprehensive usage statistics for a user.""" if not billing_period: - billing_period = datetime.now().strftime("%Y-%m") + billing_period = self.pricing_service.get_current_billing_period(user_id) or datetime.now().strftime("%Y-%m") # Get usage summary summary = self.db.query(UsageSummary).filter( @@ -480,13 +483,24 @@ class UsageTrackingService: async def enforce_usage_limits(self, user_id: str, provider: APIProvider, tokens_requested: int = 0) -> Tuple[bool, str, Dict[str, Any]]: """Enforce usage limits before making an API call.""" - - return self.pricing_service.check_usage_limits( + # Check short-lived cache first (30s) + cache_key = f"{user_id}:{provider.value}" + now = datetime.utcnow() + cached = self._enforce_cache.get(cache_key) + if cached and cached.get('expires_at') and cached['expires_at'] > now: + return tuple(cached['result']) # type: ignore + + result = self.pricing_service.check_usage_limits( user_id=user_id, provider=provider, tokens_requested=tokens_requested ) - + self._enforce_cache[cache_key] = { + 'result': result, + 'expires_at': now + timedelta(seconds=30) + } + return result + async def reset_current_billing_period(self, user_id: str) -> Dict[str, Any]: """Reset usage status for the current billing period (after plan change).""" try: @@ -508,4 +522,4 @@ class UsageTrackingService: except Exception as e: self.db.rollback() logger.error(f"Error resetting usage status: {e}") - return {"reset": False, "error": str(e)} \ No newline at end of file + return {"reset": False, "error": str(e)} diff --git a/backend/services/user_api_key_context.py b/backend/services/user_api_key_context.py index 238d5e19..63968b9e 100644 --- a/backend/services/user_api_key_context.py +++ b/backend/services/user_api_key_context.py @@ -70,7 +70,7 @@ class UserAPIKeyContext: def _load_from_database(self, user_id: str) -> Dict[str, str]: """Load API keys from database for specific user.""" try: - from services.onboarding_database_service import OnboardingDatabaseService + from services.onboarding.database_service import OnboardingDatabaseService from services.database import SessionLocal db_service = OnboardingDatabaseService() diff --git a/backend/utils/logger_utils.py b/backend/utils/logger_utils.py new file mode 100644 index 00000000..04752820 --- /dev/null +++ b/backend/utils/logger_utils.py @@ -0,0 +1,53 @@ +""" +Logger utilities to prevent conflicts between different logging configurations. +""" + +from loguru import logger +import sys + + +def safe_logger_config(format_string: str, level: str = "INFO"): + """ + Safely configure logger without removing existing handlers. + This prevents conflicts with the main logging configuration. + + Args: + format_string: Log format string + level: Log level + """ + try: + # Only add a new handler if we don't already have one with this format + existing_handlers = logger._core.handlers + for handler in existing_handlers: + if hasattr(handler, '_sink') and handler._sink == sys.stdout: + # Check if format is similar to avoid duplicates + if hasattr(handler, '_format') and handler._format == format_string: + return # Handler already exists with this format + + # Add new handler only if needed + logger.add( + sys.stdout, + level=level, + format=format_string, + colorize=True + ) + except Exception as e: + # If there's any error, just use the existing logger configuration + pass + + +def get_service_logger(service_name: str, format_string: str = None): + """ + Get a logger for a specific service without conflicting with main configuration. + + Args: + service_name: Name of the service + format_string: Optional custom format string + + Returns: + Logger instance + """ + if format_string: + safe_logger_config(format_string) + + return logger.bind(service=service_name) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 3ae89bf0..463f39bb 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -9,8 +9,9 @@ "version": "1.0.0", "dependencies": { "@clerk/clerk-react": "^5.46.1", - "@copilotkit/react-core": "^1.10.3", - "@copilotkit/react-ui": "^1.10.3", + "@copilotkit/react-core": "^1.10.6", + "@copilotkit/react-textarea": "^1.10.6", + "@copilotkit/react-ui": "^1.10.6", "@copilotkit/shared": "^1.10.3", "@emotion/react": "^11.11.0", "@emotion/styled": "^11.11.0", @@ -53,9 +54,9 @@ } }, "node_modules/@ag-ui/core": { - "version": "0.0.36", - "resolved": "https://registry.npmjs.org/@ag-ui/core/-/core-0.0.36.tgz", - "integrity": "sha512-uYUrzw6uxuw4qVQ61mdSeiG0mFh2n/VAWmWsWzwETDuhqJZT7rFmd07IajcFWcyItMr1wjqxFDdlklucAyEYNA==", + "version": "0.0.37", + "resolved": "https://registry.npmjs.org/@ag-ui/core/-/core-0.0.37.tgz", + "integrity": "sha512-7bmjPn1Ol0Zo00F+MrPr0eOwH4AFZbhmq/ZMhCsrMILtVYBiBLcLU9QFBpBL3Zm9MCHha8b79N7JE2FzwcMaVA==", "dependencies": { "rxjs": "7.8.1", "zod": "^3.22.4" @@ -2197,13 +2198,13 @@ } }, "node_modules/@copilotkit/react-core": { - "version": "1.10.3", - "resolved": "https://registry.npmjs.org/@copilotkit/react-core/-/react-core-1.10.3.tgz", - "integrity": "sha512-m/R/cUENBlXP7+E7TUImVPqmgrHtMeYd3/qhOK3hQY4LqCtbEG5ju5HkEy/QbmVX5tNn/Wo8ti0kwK9tXX6lzA==", + "version": "1.10.6", + "resolved": "https://registry.npmjs.org/@copilotkit/react-core/-/react-core-1.10.6.tgz", + "integrity": "sha512-sdojpntwgOxP8lWRzaFEiWr0g2wDefjQHtve5GPPie+otseFonV88FZjSqIq5LN+q5BIwDOEhCmDjALsGjXvuQ==", "license": "MIT", "dependencies": { - "@copilotkit/runtime-client-gql": "1.10.3", - "@copilotkit/shared": "1.10.3", + "@copilotkit/runtime-client-gql": "1.10.6", + "@copilotkit/shared": "1.10.6", "@scarf/scarf": "^1.3.0", "react-markdown": "^8.0.7", "untruncate-json": "^0.0.1" @@ -2213,15 +2214,66 @@ "react-dom": "^18 || ^19 || ^19.0.0-rc" } }, - "node_modules/@copilotkit/react-ui": { - "version": "1.10.3", - "resolved": "https://registry.npmjs.org/@copilotkit/react-ui/-/react-ui-1.10.3.tgz", - "integrity": "sha512-/MvKuVLor+372yKKs7Us3AmU2A/5+zWTE0Z0rEcMOxRNOPbjLt9zj5e86aJ0alz1hCspKG4UWCAGiphf+I19ig==", + "node_modules/@copilotkit/react-textarea": { + "version": "1.10.6", + "resolved": "https://registry.npmjs.org/@copilotkit/react-textarea/-/react-textarea-1.10.6.tgz", + "integrity": "sha512-04totNGPtBkfVdYy5rCBqn47HDbdd9cqHk49At0CD9DFmGOaL7kwMbywHj4Dqq6UpDKuJqnS9aYyLI073vuZwA==", "license": "MIT", "dependencies": { - "@copilotkit/react-core": "1.10.3", - "@copilotkit/runtime-client-gql": "1.10.3", - "@copilotkit/shared": "1.10.3", + "@copilotkit/react-core": "1.10.6", + "@copilotkit/runtime-client-gql": "1.10.6", + "@copilotkit/shared": "1.10.6", + "@emotion/css": "^11.11.2", + "@emotion/react": "^11.11.1", + "@emotion/styled": "^11.11.0", + "@mui/material": "^5.14.11", + "@radix-ui/react-dialog": "^1.1.1", + "@radix-ui/react-label": "^2.0.2", + "@radix-ui/react-separator": "^1.0.3", + "@radix-ui/react-slot": "^1.0.2", + "class-variance-authority": "^0.6.1", + "clsx": "^1.2.1", + "cmdk": "^0.2.0", + "lodash.merge": "^4.6.2", + "lucide-react": "^0.274.0", + "material-icons": "^1.13.10", + "slate": "^0.94.1", + "slate-history": "^0.93.0", + "slate-react": "^0.98.1", + "tailwind-merge": "^1.13.2" + }, + "peerDependencies": { + "react": "^18 || ^19 || ^19.0.0-rc", + "react-dom": "^18 || ^19 || ^19.0.0-rc" + } + }, + "node_modules/@copilotkit/react-textarea/node_modules/clsx": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/clsx/-/clsx-1.2.1.tgz", + "integrity": "sha512-EcR6r5a8bj6pu3ycsa/E/cKVGuTgZJZdsyUYHOksG/UHIiKfjxzRxYJpyVBwYaQeOvghal9fcc4PidlgzugAQg==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/@copilotkit/react-textarea/node_modules/lucide-react": { + "version": "0.274.0", + "resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.274.0.tgz", + "integrity": "sha512-qiWcojRXEwDiSimMX1+arnxha+ROJzZjJaVvCC0rsG6a9pUPjZePXSq7em4ZKMp0NDm1hyzPNkM7UaWC3LU2AA==", + "license": "ISC", + "peerDependencies": { + "react": "^16.5.1 || ^17.0.0 || ^18.0.0" + } + }, + "node_modules/@copilotkit/react-ui": { + "version": "1.10.6", + "resolved": "https://registry.npmjs.org/@copilotkit/react-ui/-/react-ui-1.10.6.tgz", + "integrity": "sha512-eNIbZKMvBVZqlAR4fqkmZRIYIt8WhwZOxfVJVwMD9nfmWdtatmxrOLecyDiPk/hkq2o/8s2/rubaZSMK6m+GHQ==", + "license": "MIT", + "dependencies": { + "@copilotkit/react-core": "1.10.6", + "@copilotkit/runtime-client-gql": "1.10.6", + "@copilotkit/shared": "1.10.6", "@headlessui/react": "^2.1.3", "react-markdown": "^10.1.0", "react-syntax-highlighter": "^15.6.1", @@ -2511,12 +2563,12 @@ "license": "MIT" }, "node_modules/@copilotkit/runtime-client-gql": { - "version": "1.10.3", - "resolved": "https://registry.npmjs.org/@copilotkit/runtime-client-gql/-/runtime-client-gql-1.10.3.tgz", - "integrity": "sha512-c0pmm9vyK1gy7hYP8F7Me97CpfxUY7OBdWvI2JQh7oll4abL3w5IbpKIEr/UNOGGJFdgz0NJE8eDOwnadQ51ww==", + "version": "1.10.6", + "resolved": "https://registry.npmjs.org/@copilotkit/runtime-client-gql/-/runtime-client-gql-1.10.6.tgz", + "integrity": "sha512-oLX8mjppVvQCWfquW9A0500hYVNxM4X/mtt76SEvfGUb2KsNQ4j2HOCzpmtm85MeLproC+f9738wLwRueLliZg==", "license": "MIT", "dependencies": { - "@copilotkit/shared": "1.10.3", + "@copilotkit/shared": "1.10.6", "@urql/core": "^5.0.3", "untruncate-json": "^0.0.1", "urql": "^4.1.0" @@ -2526,12 +2578,12 @@ } }, "node_modules/@copilotkit/shared": { - "version": "1.10.3", - "resolved": "https://registry.npmjs.org/@copilotkit/shared/-/shared-1.10.3.tgz", - "integrity": "sha512-LCgqfWfIfC97jCS6AMXVsuCXHK4CUqet7XXKJ9SB8gXR/kiDciP543gtoXznQK6L5ZP5FGDEH0KtPdNd/2Mcgg==", + "version": "1.10.6", + "resolved": "https://registry.npmjs.org/@copilotkit/shared/-/shared-1.10.6.tgz", + "integrity": "sha512-56Rltf4fDBqCpl1ZXARypt5NdE4LTg3tGPPLurZpgPmm31Lv5EAHpfjC7I55vt9A0mXWlTCHtCrpiaAlTyzGJw==", "license": "MIT", "dependencies": { - "@ag-ui/core": "^0.0.36", + "@ag-ui/core": "^0.0.37", "@segment/analytics-node": "^2.1.2", "chalk": "4.1.2", "graphql": "^16.8.1", @@ -2871,6 +2923,19 @@ "stylis": "4.2.0" } }, + "node_modules/@emotion/css": { + "version": "11.13.5", + "resolved": "https://registry.npmjs.org/@emotion/css/-/css-11.13.5.tgz", + "integrity": "sha512-wQdD0Xhkn3Qy2VNcIzbLP9MR8TafI0MJb7BEAXKp+w4+XqErksWR4OXomuDzPsN4InLdGhVe6EYcn2ZIUCpB8w==", + "license": "MIT", + "dependencies": { + "@emotion/babel-plugin": "^11.13.5", + "@emotion/cache": "^11.13.5", + "@emotion/serialize": "^1.3.3", + "@emotion/sheet": "^1.4.0", + "@emotion/utils": "^1.4.2" + } + }, "node_modules/@emotion/hash": { "version": "0.9.2", "resolved": "https://registry.npmjs.org/@emotion/hash/-/hash-0.9.2.tgz", @@ -3592,6 +3657,12 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@juggle/resize-observer": { + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/@juggle/resize-observer/-/resize-observer-3.4.0.tgz", + "integrity": "sha512-dfLbk+PwWvFzSxwk3n5ySL0hfBog779o8h68wK/7/APo/7cgyWp5jcXockbxdk5kFRkbeXWm4Fbi9FrdN381sA==", + "license": "Apache-2.0" + }, "node_modules/@leichtgewicht/ip-codec": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/@leichtgewicht/ip-codec/-/ip-codec-2.0.5.tgz", @@ -4025,8 +4096,7 @@ "version": "1.1.3", "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz", "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==", - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@radix-ui/react-arrow": { "version": "1.1.7", @@ -4099,7 +4169,6 @@ "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz", "integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==", "license": "MIT", - "peer": true, "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" @@ -4110,6 +4179,42 @@ } } }, + "node_modules/@radix-ui/react-dialog": { + "version": "1.1.15", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.1.15.tgz", + "integrity": "sha512-TCglVRtzlffRNxRMEyR36DGBLJpeusFcgMVD9PZEzAKnUs1lKCgX5u9BmC2Yg+LL9MgZDugFFs1Vl+Jp4t/PGw==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-focus-guards": "1.1.3", + "@radix-ui/react-focus-scope": "1.1.7", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "aria-hidden": "^1.2.4", + "react-remove-scroll": "^2.6.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-direction": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.1.tgz", @@ -4131,7 +4236,6 @@ "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz", "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==", "license": "MIT", - "peer": true, "dependencies": { "@radix-ui/primitive": "1.1.3", "@radix-ui/react-compose-refs": "1.1.2", @@ -4159,7 +4263,6 @@ "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz", "integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==", "license": "MIT", - "peer": true, "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" @@ -4175,7 +4278,6 @@ "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz", "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==", "license": "MIT", - "peer": true, "dependencies": { "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-primitive": "2.1.3", @@ -4201,7 +4303,6 @@ "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz", "integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==", "license": "MIT", - "peer": true, "dependencies": { "@radix-ui/react-use-layout-effect": "1.1.1" }, @@ -4215,6 +4316,29 @@ } } }, + "node_modules/@radix-ui/react-label": { + "version": "2.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-label/-/react-label-2.1.7.tgz", + "integrity": "sha512-YT1GqPSL8kJn20djelMX7/cTRp/Y9w5IZHvfxQTVHrOqa2yMl7i/UfMqKRU5V7mEyKTrUVgJXhNQPVCG8PBLoQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-primitive": "2.1.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-popper": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", @@ -4253,7 +4377,6 @@ "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", "license": "MIT", - "peer": true, "dependencies": { "@radix-ui/react-primitive": "2.1.3", "@radix-ui/react-use-layout-effect": "1.1.1" @@ -4273,12 +4396,35 @@ } } }, + "node_modules/@radix-ui/react-presence": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", + "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-primitive": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", "license": "MIT", - "peer": true, "dependencies": { "@radix-ui/react-slot": "1.2.3" }, @@ -4373,6 +4519,29 @@ } } }, + "node_modules/@radix-ui/react-separator": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.7.tgz", + "integrity": "sha512-0HEb8R9E8A+jZjvmFCy/J4xhbXy3TV+9XSnGJ3KvTtjlIUy/YQ/p6UYZvi7YbeoeXdyU9+Y3scizK6hkY37baA==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-primitive": "2.1.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-slider": { "version": "1.3.6", "resolved": "https://registry.npmjs.org/@radix-ui/react-slider/-/react-slider-1.3.6.tgz", @@ -4486,7 +4655,6 @@ "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz", "integrity": "sha512-FkBMwD+qbGQeMu1cOHnuGB6x4yzPjho8ap5WtbEJ26umhgqVXbhekKUQO+hZEL1vU92a3wHwdp0HAcqAUF5iDg==", "license": "MIT", - "peer": true, "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" @@ -4502,7 +4670,6 @@ "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.2.2.tgz", "integrity": "sha512-BjasUjixPFdS+NKkypcyyN5Pmg83Olst0+c6vGov0diwTEo6mgdqVR6hxcEgFuh4QrAs7Rc+9KuGJ9TVCj0Zzg==", "license": "MIT", - "peer": true, "dependencies": { "@radix-ui/react-use-effect-event": "0.0.2", "@radix-ui/react-use-layout-effect": "1.1.1" @@ -4522,7 +4689,6 @@ "resolved": "https://registry.npmjs.org/@radix-ui/react-use-effect-event/-/react-use-effect-event-0.0.2.tgz", "integrity": "sha512-Qp8WbZOBe+blgpuUT+lw2xheLP8q0oatc9UpmiemEICxGvFLYmHm9QowVZGHtJlGbS6A6yJ3iViad/2cVjnOiA==", "license": "MIT", - "peer": true, "dependencies": { "@radix-ui/react-use-layout-effect": "1.1.1" }, @@ -4541,7 +4707,6 @@ "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.1.1.tgz", "integrity": "sha512-Il0+boE7w/XebUHyBjroE+DbByORGR9KKmITzbR7MyQ4akpORYP/ZmbhAr0DG7RmmBqoOnZdy2QlvajJ2QA59g==", "license": "MIT", - "peer": true, "dependencies": { "@radix-ui/react-use-callback-ref": "1.1.1" }, @@ -4560,7 +4725,6 @@ "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz", "integrity": "sha512-RbJRS4UWQFkzHTTwVymMTUv8EqYhOp8dOOviLj2ugtTiXRaRQS7GLGxZTLL1jWhMeoSCf5zmcZkqTl9IiYfXcQ==", "license": "MIT", - "peer": true, "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" @@ -5482,6 +5646,12 @@ "@types/node": "*" } }, + "node_modules/@types/is-hotkey": { + "version": "0.1.10", + "resolved": "https://registry.npmjs.org/@types/is-hotkey/-/is-hotkey-0.1.10.tgz", + "integrity": "sha512-RvC8KMw5BCac1NvRRyaHgMMEtBaZ6wh0pyPTBu7izn4Sj/AX9Y4aXU5c7rX8PnM/knsuUpC1IeoBkANtxBypsQ==", + "license": "MIT" + }, "node_modules/@types/istanbul-lib-coverage": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz", @@ -5524,6 +5694,12 @@ "integrity": "sha512-HMwFiRujE5PjrgwHQ25+bsLJgowjGjm5Z8FVSf0N6PwgJrwxH0QxzHYDcKsTfV3wva0vzrpqMTJS2jXPr5BMEQ==", "license": "MIT" }, + "node_modules/@types/lodash": { + "version": "4.17.20", + "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.17.20.tgz", + "integrity": "sha512-H3MHACvFUEiujabxhaI/ImO6gUrd8oOurg7LQtS7mbwIXA/cUqWrvBsaeJ23aZEPk1TAYkurjfMbSELfoCXlGA==", + "license": "MIT" + }, "node_modules/@types/mdast": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz", @@ -7277,7 +7453,6 @@ "resolved": "https://registry.npmjs.org/aria-hidden/-/aria-hidden-1.2.6.tgz", "integrity": "sha512-ik3ZgC9dY/lYVVM++OISsaYDeg1tb0VtP5uL3ouh1koGOaUMDPpbFIei4JkFimWUFPn90sbMNMXQAIVOlnYKJA==", "license": "MIT", - "peer": true, "dependencies": { "tslib": "^2.0.0" }, @@ -8407,6 +8582,27 @@ "integrity": "sha512-9z8TZaGM1pfswYeXrUpzPrkx8UnWYdhJclsiYMm6x/w5+nN+8Tf/LnAgfLGQCm59qAOxU8WwHEq2vNwF6i4j+Q==", "license": "MIT" }, + "node_modules/class-variance-authority": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/class-variance-authority/-/class-variance-authority-0.6.1.tgz", + "integrity": "sha512-eurOEGc7YVx3majOrOb099PNKgO3KnKSApOprXI4BTq6bcfbqbQXPN2u+rPPmIJ2di23bMwhk0SxCCthBmszEQ==", + "license": "Apache-2.0", + "dependencies": { + "clsx": "1.2.1" + }, + "funding": { + "url": "https://joebell.co.uk" + } + }, + "node_modules/class-variance-authority/node_modules/clsx": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/clsx/-/clsx-1.2.1.tgz", + "integrity": "sha512-EcR6r5a8bj6pu3ycsa/E/cKVGuTgZJZdsyUYHOksG/UHIiKfjxzRxYJpyVBwYaQeOvghal9fcc4PidlgzugAQg==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/clean-css": { "version": "5.3.3", "resolved": "https://registry.npmjs.org/clean-css/-/clean-css-5.3.3.tgz", @@ -8448,6 +8644,269 @@ "node": ">=6" } }, + "node_modules/cmdk": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/cmdk/-/cmdk-0.2.1.tgz", + "integrity": "sha512-U6//9lQ6JvT47+6OF6Gi8BvkxYQ8SCRRSKIJkthIMsFsLZRG0cKvTtuTaefyIKMQb8rvvXy0wGdpTNq/jPtm+g==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-dialog": "1.0.0" + }, + "peerDependencies": { + "react": "^18.0.0", + "react-dom": "^18.0.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/primitive": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.0.0.tgz", + "integrity": "sha512-3e7rn8FDMin4CgeL7Z/49smCA3rFYY3Ha2rUQ7HRWFadS5iCRw08ZgVT1LaNTCNqgvrUiyczLflrVrF0SRQtNA==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-compose-refs": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.0.0.tgz", + "integrity": "sha512-0KaSv6sx787/hK3eF53iOkiSLwAGlFMx5lotrqD2pTjB18KbybKoEIgkNZTKC60YECDQTKGTRcDBILwZVqVKvA==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-context": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.0.0.tgz", + "integrity": "sha512-1pVM9RfOQ+n/N5PJK33kRSKsr1glNxomxONs5c49MliinBY6Yw2Q995qfBUUo0/Mbg05B/sGA0gkgPI7kmSHBg==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-dialog": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.0.0.tgz", + "integrity": "sha512-Yn9YU+QlHYLWwV1XfKiqnGVpWYWk6MeBVM6x/bcoyPvxgjQGoeT35482viLPctTMWoMw0PoHgqfSox7Ig+957Q==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/primitive": "1.0.0", + "@radix-ui/react-compose-refs": "1.0.0", + "@radix-ui/react-context": "1.0.0", + "@radix-ui/react-dismissable-layer": "1.0.0", + "@radix-ui/react-focus-guards": "1.0.0", + "@radix-ui/react-focus-scope": "1.0.0", + "@radix-ui/react-id": "1.0.0", + "@radix-ui/react-portal": "1.0.0", + "@radix-ui/react-presence": "1.0.0", + "@radix-ui/react-primitive": "1.0.0", + "@radix-ui/react-slot": "1.0.0", + "@radix-ui/react-use-controllable-state": "1.0.0", + "aria-hidden": "^1.1.1", + "react-remove-scroll": "2.5.4" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-dismissable-layer": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.0.0.tgz", + "integrity": "sha512-n7kDRfx+LB1zLueRDvZ1Pd0bxdJWDUZNQ/GWoxDn2prnuJKRdxsjulejX/ePkOsLi2tTm6P24mDqlMSgQpsT6g==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/primitive": "1.0.0", + "@radix-ui/react-compose-refs": "1.0.0", + "@radix-ui/react-primitive": "1.0.0", + "@radix-ui/react-use-callback-ref": "1.0.0", + "@radix-ui/react-use-escape-keydown": "1.0.0" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-focus-guards": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.0.0.tgz", + "integrity": "sha512-UagjDk4ijOAnGu4WMUPj9ahi7/zJJqNZ9ZAiGPp7waUWJO0O1aWXi/udPphI0IUjvrhBsZJGSN66dR2dsueLWQ==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-focus-scope": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.0.0.tgz", + "integrity": "sha512-C4SWtsULLGf/2L4oGeIHlvWQx7Rf+7cX/vKOAD2dXW0A1b5QXwi3wWeaEgW+wn+SEVrraMUk05vLU9fZZz5HbQ==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-compose-refs": "1.0.0", + "@radix-ui/react-primitive": "1.0.0", + "@radix-ui/react-use-callback-ref": "1.0.0" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-id": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.0.0.tgz", + "integrity": "sha512-Q6iAB/U7Tq3NTolBBQbHTgclPmGWE3OlktGGqrClPozSw4vkQ1DfQAOtzgRPecKsMdJINE05iaoDUG8tRzCBjw==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-use-layout-effect": "1.0.0" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-portal": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.0.0.tgz", + "integrity": "sha512-a8qyFO/Xb99d8wQdu4o7qnigNjTPG123uADNecz0eX4usnQEj7o+cG4ZX4zkqq98NYekT7UoEQIjxBNWIFuqTA==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-primitive": "1.0.0" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-presence": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.0.0.tgz", + "integrity": "sha512-A+6XEvN01NfVWiKu38ybawfHsBjWum42MRPnEuqPsBZ4eV7e/7K321B5VgYMPv3Xx5An6o1/l9ZuDBgmcmWK3w==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-compose-refs": "1.0.0", + "@radix-ui/react-use-layout-effect": "1.0.0" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-primitive": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-1.0.0.tgz", + "integrity": "sha512-EyXe6mnRlHZ8b6f4ilTDrXmkLShICIuOTTj0GX4w1rp+wSxf3+TD05u1UOITC8VsJ2a9nwHvdXtOXEOl0Cw/zQ==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-slot": "1.0.0" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-slot": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.0.0.tgz", + "integrity": "sha512-3mrKauI/tWXo1Ll+gN5dHcxDPdm/Df1ufcDLCecn+pnCIVcdWE7CujXo8QaXOWRJyZyQWWbpB8eFwHzWXlv5mQ==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-compose-refs": "1.0.0" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-use-callback-ref": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.0.0.tgz", + "integrity": "sha512-GZtyzoHz95Rhs6S63D2t/eqvdFCm7I+yHMLVQheKM7nBD8mbZIt+ct1jz4536MDnaOGKIxynJ8eHTkVGVVkoTg==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-use-controllable-state": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.0.0.tgz", + "integrity": "sha512-FohDoZvk3mEXh9AWAVyRTYR4Sq7/gavuofglmiXB2g1aKyboUD4YtgWxKj8O5n+Uak52gXQ4wKz5IFST4vtJHg==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-use-callback-ref": "1.0.0" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-use-escape-keydown": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.0.0.tgz", + "integrity": "sha512-JwfBCUIfhXRxKExgIqGa4CQsiMemo1Xt0W/B4ei3fpzpvPENKpMKQ8mZSB6Acj3ebrAEgi2xiQvcI1PAAodvyg==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-use-callback-ref": "1.0.0" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/@radix-ui/react-use-layout-effect": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.0.0.tgz", + "integrity": "sha512-6Tpkq+R6LOlmQb1R5NNETLG0B4YP0wc+klfXafpUCj6JGyaUc8il7/kUZ7m59rGbXGczE9Bs+iz2qloqsZBduQ==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0" + } + }, + "node_modules/cmdk/node_modules/react-remove-scroll": { + "version": "2.5.4", + "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.5.4.tgz", + "integrity": "sha512-xGVKJJr0SJGQVirVFAUZ2k1QLyO6m+2fy0l8Qawbp5Jgrv3DeLalrfMNBFSlmz5kriGGzsVBtGVnf4pTKIhhWA==", + "license": "MIT", + "dependencies": { + "react-remove-scroll-bar": "^2.3.3", + "react-style-singleton": "^2.2.1", + "tslib": "^2.1.0", + "use-callback-ref": "^1.3.0", + "use-sidecar": "^1.1.2" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, "node_modules/co": { "version": "4.6.0", "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz", @@ -8670,6 +9129,12 @@ "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", "license": "MIT" }, + "node_modules/compute-scroll-into-view": { + "version": "1.0.20", + "resolved": "https://registry.npmjs.org/compute-scroll-into-view/-/compute-scroll-into-view-1.0.20.tgz", + "integrity": "sha512-UCB0ioiyj8CRjtrvaceBLqqhZCVP+1B8+NWQhmdsm0VXOJtobBCf1dBQmebCCo34qZmUwZfIH2MZLqNHazrfjg==", + "license": "MIT" + }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -9564,8 +10029,7 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/detect-node-es/-/detect-node-es-1.1.0.tgz", "integrity": "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ==", - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/detect-port-alt": { "version": "1.1.6", @@ -9648,6 +10112,19 @@ "node": ">=8" } }, + "node_modules/direction": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/direction/-/direction-1.0.4.tgz", + "integrity": "sha512-GYqKi1aH7PJXxdhTeZBFrg8vUBeKXi+cNprXsC1kpJcbcVnV9wBsrOu1cQEdG0WeQwlfHiy3XvnKfIrJ2R0NzQ==", + "license": "MIT", + "bin": { + "direction": "cli.js" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/dlv": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz", @@ -11595,7 +12072,6 @@ "resolved": "https://registry.npmjs.org/get-nonce/-/get-nonce-1.0.1.tgz", "integrity": "sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==", "license": "MIT", - "peer": true, "engines": { "node": ">=6" } @@ -13128,6 +13604,12 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/is-hotkey": { + "version": "0.1.8", + "resolved": "https://registry.npmjs.org/is-hotkey/-/is-hotkey-0.1.8.tgz", + "integrity": "sha512-qs3NZ1INIS+H+yeo7cD9pDfwYV/jqRh1JG9S9zYrNudkoUQg7OL7ziXqRKu+InFjUIDoP2o6HIkLYMh1pcWgyQ==", + "license": "MIT" + }, "node_modules/is-map": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/is-map/-/is-map-2.0.3.tgz", @@ -13213,6 +13695,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/is-plain-object": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-5.0.0.tgz", + "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/is-potential-custom-element-name": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz", @@ -14984,6 +15475,12 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/material-icons": { + "version": "1.13.14", + "resolved": "https://registry.npmjs.org/material-icons/-/material-icons-1.13.14.tgz", + "integrity": "sha512-kZOfc7xCC0rAT8Q3DQixYAeT+tBqZnxkseQtp2bxBxz7q5pMAC+wmit7vJn1g/l7wRU+HEPq23gER4iPjGs5Cg==", + "license": "Apache-2.0" + }, "node_modules/math-intrinsics": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", @@ -18945,7 +19442,6 @@ "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.7.1.tgz", "integrity": "sha512-HpMh8+oahmIdOuS5aFKKY6Pyog+FNaZV/XyJOq7b4YFwsFHe5yYfdbIalI4k3vU2nSDql7YskmUseHsRrJqIPA==", "license": "MIT", - "peer": true, "dependencies": { "react-remove-scroll-bar": "^2.3.7", "react-style-singleton": "^2.2.3", @@ -18971,7 +19467,6 @@ "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.8.tgz", "integrity": "sha512-9r+yi9+mgU33AKcj6IbT9oRCO78WriSj6t/cF8DWBZJ9aOGPOTEDvdUDz1FwKim7QXWwmHqtdHnRJfhAxEG46Q==", "license": "MIT", - "peer": true, "dependencies": { "react-style-singleton": "^2.2.2", "tslib": "^2.0.0" @@ -19099,7 +19594,6 @@ "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.3.tgz", "integrity": "sha512-b6jSvxvVnyptAiLjbkWLE/lOnR4lfTtDAl+eUC7RZy+QQWc6wRzIV2CE6xBuMmDxc2qIihtDCZD5NPOFl7fRBQ==", "license": "MIT", - "peer": true, "dependencies": { "get-nonce": "^1.0.0", "tslib": "^2.0.0" @@ -20885,6 +21379,15 @@ "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", "license": "MIT" }, + "node_modules/scroll-into-view-if-needed": { + "version": "2.2.31", + "resolved": "https://registry.npmjs.org/scroll-into-view-if-needed/-/scroll-into-view-if-needed-2.2.31.tgz", + "integrity": "sha512-dGCXy99wZQivjmjIqihaBQNjryrz5rueJY7eHfTdyWEiR4ttYpsajb14rn9s5d4DY4EcY6+4+U/maARBXJedkA==", + "license": "MIT", + "dependencies": { + "compute-scroll-into-view": "^1.0.20" + } + }, "node_modules/select-hose": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/select-hose/-/select-hose-2.0.0.tgz", @@ -21244,6 +21747,57 @@ "node": ">=8" } }, + "node_modules/slate": { + "version": "0.94.1", + "resolved": "https://registry.npmjs.org/slate/-/slate-0.94.1.tgz", + "integrity": "sha512-GH/yizXr1ceBoZ9P9uebIaHe3dC/g6Plpf9nlUwnvoyf6V1UOYrRwkabtOCd3ZfIGxomY4P7lfgLr7FPH8/BKA==", + "license": "MIT", + "dependencies": { + "immer": "^9.0.6", + "is-plain-object": "^5.0.0", + "tiny-warning": "^1.0.3" + } + }, + "node_modules/slate-history": { + "version": "0.93.0", + "resolved": "https://registry.npmjs.org/slate-history/-/slate-history-0.93.0.tgz", + "integrity": "sha512-Gr1GMGPipRuxIz41jD2/rbvzPj8eyar56TVMyJBvBeIpQSSjNISssvGNDYfJlSWM8eaRqf6DAcxMKzsLCYeX6g==", + "license": "MIT", + "dependencies": { + "is-plain-object": "^5.0.0" + }, + "peerDependencies": { + "slate": ">=0.65.3" + } + }, + "node_modules/slate-react": { + "version": "0.98.4", + "resolved": "https://registry.npmjs.org/slate-react/-/slate-react-0.98.4.tgz", + "integrity": "sha512-8Of3v9hFuX8rIRc86LuuBhU9t8ps+9ARKL4yyhCrKQYZ93Ep/LFA3GvPGvtf3zYuVadZ8tkhRH8tbHOGNAndLw==", + "license": "MIT", + "dependencies": { + "@juggle/resize-observer": "^3.4.0", + "@types/is-hotkey": "^0.1.1", + "@types/lodash": "^4.14.149", + "direction": "^1.0.3", + "is-hotkey": "^0.1.6", + "is-plain-object": "^5.0.0", + "lodash": "^4.17.4", + "scroll-into-view-if-needed": "^2.2.20", + "tiny-invariant": "1.0.6" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0", + "slate": ">=0.65.3" + } + }, + "node_modules/slate-react/node_modules/tiny-invariant": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.0.6.tgz", + "integrity": "sha512-FOyLWWVjG+aC0UqG76V53yAWdXfH8bO6FNmyZOuUrzDzK8DI3/JRY25UD7+g49JWM1LXwymsKERB+DzI0dTEQA==", + "license": "MIT" + }, "node_modules/sockjs": { "version": "0.3.24", "resolved": "https://registry.npmjs.org/sockjs/-/sockjs-0.3.24.tgz", @@ -22184,6 +22738,16 @@ "integrity": "sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew==", "license": "MIT" }, + "node_modules/tailwind-merge": { + "version": "1.14.0", + "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-1.14.0.tgz", + "integrity": "sha512-3mFKyCo/MBcgyOTlrY8T7odzZFx+w+qKSMAmdFzRvqBfLlSigU6TZnlFHK0lkMwj9Bj8OYU+9yW9lmGuS0QEnQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/dcastil" + } + }, "node_modules/tailwindcss": { "version": "3.4.17", "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.17.tgz", @@ -22418,6 +22982,12 @@ "integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==", "license": "MIT" }, + "node_modules/tiny-warning": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/tiny-warning/-/tiny-warning-1.0.3.tgz", + "integrity": "sha512-lBN9zLN/oAf68o3zNXYrdCt1kP8WsiGW8Oo2ka41b2IM5JL/S1CTyX1rW0mb/zSuJun0ZUrDxx4sqvYS2FWzPA==", + "license": "MIT" + }, "node_modules/tmpl": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", @@ -23100,7 +23670,6 @@ "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.3.tgz", "integrity": "sha512-jQL3lRnocaFtu3V00JToYz/4QkNWswxijDaCVNZRiRTO3HQDLsdu1ZtmIUvV4yPp+rvWm5j0y0TG/S61cuijTg==", "license": "MIT", - "peer": true, "dependencies": { "tslib": "^2.0.0" }, @@ -23122,7 +23691,6 @@ "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.3.tgz", "integrity": "sha512-Fedw0aZvkhynoPYlA5WXrMCAMm+nSWdZt6lzJQ7Ok8S6Q+VsHmHpRWndVRJ8Be0ZbkfPc5LRYH+5XrzXcEeLRQ==", "license": "MIT", - "peer": true, "dependencies": { "detect-node-es": "^1.1.0", "tslib": "^2.0.0" diff --git a/frontend/package.json b/frontend/package.json index c1f8a9d3..f85b4d4e 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -5,8 +5,9 @@ "private": true, "dependencies": { "@clerk/clerk-react": "^5.46.1", - "@copilotkit/react-core": "^1.10.3", - "@copilotkit/react-ui": "^1.10.3", + "@copilotkit/react-core": "^1.10.6", + "@copilotkit/react-textarea": "^1.10.6", + "@copilotkit/react-ui": "^1.10.6", "@copilotkit/shared": "^1.10.3", "@emotion/react": "^11.11.0", "@emotion/styled": "^11.11.0", diff --git a/frontend/src/api/cachedAnalytics.ts b/frontend/src/api/cachedAnalytics.ts index 15f11491..f3c4d94f 100644 --- a/frontend/src/api/cachedAnalytics.ts +++ b/frontend/src/api/cachedAnalytics.ts @@ -74,15 +74,15 @@ class CachedAnalyticsAPI { * Get analytics data with caching */ async getAnalyticsData(platforms?: string[], bypassCache: boolean = false): Promise { - const params = platforms ? { platforms: platforms.join(',') } : undefined; + const baseParams: any = platforms ? { platforms: platforms.join(',') } : {}; const endpoint = '/api/analytics/data'; // If bypassing cache, add timestamp to force fresh request - const requestParams = bypassCache ? { ...params, _t: Date.now() } : params; + const requestParams = bypassCache ? { ...baseParams, _t: Date.now() } : baseParams; // Try to get from cache first (unless bypassing) if (!bypassCache) { - const cached = analyticsCache.get(endpoint, params); + const cached = analyticsCache.get(endpoint, baseParams); if (cached) { console.log('πŸ“¦ Analytics Cache HIT: Analytics data (cached for 60 minutes)'); return cached; @@ -95,7 +95,7 @@ class CachedAnalyticsAPI { // Cache the result with extended TTL (unless bypassing) if (!bypassCache) { - analyticsCache.set(endpoint, params, response.data, this.CACHE_TTL.ANALYTICS_DATA); + analyticsCache.set(endpoint, baseParams, response.data, this.CACHE_TTL.ANALYTICS_DATA); } return response.data; diff --git a/frontend/src/api/gsc.ts b/frontend/src/api/gsc.ts index 5d094eb6..ef9a07be 100644 --- a/frontend/src/api/gsc.ts +++ b/frontend/src/api/gsc.ts @@ -199,4 +199,4 @@ class GSCAPI { } } -export const gscAPI = new GSCAPI(); +export const gscAPI = new GSCAPI(); \ No newline at end of file diff --git a/frontend/src/api/seoDashboard.ts b/frontend/src/api/seoDashboard.ts index 670a04bc..36df9218 100644 --- a/frontend/src/api/seoDashboard.ts +++ b/frontend/src/api/seoDashboard.ts @@ -21,6 +21,10 @@ export interface PlatformStatus { connected: boolean; last_sync?: string; data_points?: number; + // Additional Bing-specific properties + has_expired_tokens?: boolean; + last_token_date?: string; + total_tokens?: number; } export interface AIInsight { @@ -40,6 +44,19 @@ export interface SEODashboardData { ai_insights: AIInsight[]; last_updated: string; website_url?: string; // User's website URL from onboarding + // Real data from backend + summary?: { + clicks: number; + impressions: number; + ctr: number; + position: number; + }; + timeseries?: any[]; + competitor_insights?: { + competitor_keywords: any[]; + content_gaps: any[]; + opportunity_score: number; + }; } // SEO Dashboard API functions diff --git a/frontend/src/components/SEODashboard/SEOCopilotContext.tsx b/frontend/src/components/SEODashboard/SEOCopilotContext.tsx index 0e53513f..dab3365d 100644 --- a/frontend/src/components/SEODashboard/SEOCopilotContext.tsx +++ b/frontend/src/components/SEODashboard/SEOCopilotContext.tsx @@ -1,7 +1,7 @@ // SEO CopilotKit Context Component // Provides real-time context and instructions to CopilotKit -import React, { useEffect, useRef } from 'react'; +import React, { useEffect, useRef, useMemo } from 'react'; import { useCopilotReadable } from '@copilotkit/react-core'; import { useSEOCopilotStore } from '../../stores/seoCopilotStore'; @@ -27,25 +27,29 @@ const SEOCopilotContext: React.FC<{ children: React.ReactNode }> = ({ children } } }, [personalizationData]); + // Memoize values to prevent unnecessary re-renders + const websiteUrl = useMemo(() => analysisData?.url || '', [analysisData?.url]); + const statusData = useMemo(() => ({ + isLoading, + isAnalyzing, + isGenerating, + error + }), [isLoading, isAnalyzing, isGenerating, error]); + const suggestionsCount = useMemo(() => Array.isArray(suggestions) ? suggestions.length : 0, [suggestions]); + // Register SEO analysis data with CopilotKit useCopilotReadable({ description: "Current SEO analysis data and insights", value: analysisData, categories: ["seo", "analysis"] }); - if (process.env.NODE_ENV === 'development') { - console.log('[CopilotContext] Registered analysis data', !!analysisData); - } // Provide a flat, explicit website URL for the LLM useCopilotReadable({ description: "Current website URL the user is working on", - value: analysisData?.url || '', + value: websiteUrl, categories: ["seo", "context"] }); - if (process.env.NODE_ENV === 'development') { - console.log('[CopilotContext] Registered website URL', analysisData?.url); - } // Register personalization data with CopilotKit useCopilotReadable({ @@ -53,9 +57,6 @@ const SEOCopilotContext: React.FC<{ children: React.ReactNode }> = ({ children } value: personalizationData, categories: ["user", "preferences"] }); - if (process.env.NODE_ENV === 'development') { - console.log('[CopilotContext] Registered personalization', !!personalizationData); - } // Register dashboard layout with CopilotKit useCopilotReadable({ @@ -63,9 +64,6 @@ const SEOCopilotContext: React.FC<{ children: React.ReactNode }> = ({ children } value: dashboardLayout, categories: ["ui", "layout"] }); - if (process.env.NODE_ENV === 'development') { - console.log('[CopilotContext] Registered layout', !!dashboardLayout); - } // Register suggestions with CopilotKit useCopilotReadable({ @@ -73,24 +71,25 @@ const SEOCopilotContext: React.FC<{ children: React.ReactNode }> = ({ children } value: suggestions, categories: ["actions", "suggestions"] }); - if (process.env.NODE_ENV === 'development') { - console.log('[CopilotContext] Registered suggestions', Array.isArray(suggestions) ? suggestions.length : 0); - } // Register loading states with CopilotKit useCopilotReadable({ description: "Current loading and processing states", - value: { - isLoading, - isAnalyzing, - isGenerating, - error - }, + value: statusData, categories: ["status", "loading"] }); - if (process.env.NODE_ENV === 'development') { - console.log('[CopilotContext] Registered status', { isLoading, isAnalyzing, isGenerating, hasError: !!error }); - } + + // Debug logging only in development and only when values actually change + useEffect(() => { + if (process.env.NODE_ENV === 'development') { + console.log('[CopilotContext] Registered analysis data', !!analysisData); + console.log('[CopilotContext] Registered website URL', websiteUrl); + console.log('[CopilotContext] Registered personalization', !!personalizationData); + console.log('[CopilotContext] Registered layout', !!dashboardLayout); + console.log('[CopilotContext] Registered suggestions', suggestionsCount); + console.log('[CopilotContext] Registered status', { isLoading, isAnalyzing, isGenerating, hasError: !!error }); + } + }, [analysisData, websiteUrl, personalizationData, dashboardLayout, suggestionsCount, statusData]); return <>{children}; }; diff --git a/frontend/src/components/SEODashboard/SEODashboard.tsx b/frontend/src/components/SEODashboard/SEODashboard.tsx index 96eb407c..0b24e355 100644 --- a/frontend/src/components/SEODashboard/SEODashboard.tsx +++ b/frontend/src/components/SEODashboard/SEODashboard.tsx @@ -1,4 +1,4 @@ -import React, { useEffect } from 'react'; +import React, { useEffect, useRef, useState } from 'react'; import { Box, Container, @@ -7,10 +7,28 @@ import { Alert, Skeleton, Chip, - Button + Button, + IconButton, + Tooltip, + Menu, + MenuItem, + Divider, + Avatar } from '@mui/material'; import { motion, AnimatePresence } from 'framer-motion'; import { useAuth, useUser, SignInButton, SignOutButton } from '@clerk/clerk-react'; +import { apiClient } from '../../api/client'; +import { + Search as SearchIcon, + Refresh as RefreshIcon, + Person as PersonIcon, + ExitToApp as ExitIcon, + ArrowBack as ArrowBackIcon, + MoreVert as MoreVertIcon, + CheckCircle as CheckCircleIcon, + Schedule as ScheduleIcon, + Info as InfoIcon +} from '@mui/icons-material'; // Shared components import { DashboardContainer, GlassCard } from '../shared/styled'; @@ -28,6 +46,14 @@ import { useSEODashboardStore } from '../../stores/seoDashboardStore'; // API import { userDataAPI } from '../../api/userData'; +// Shared components +import PlatformAnalytics from '../shared/PlatformAnalytics'; +import { cachedAnalyticsAPI } from '../../api/cachedAnalytics'; + +// OAuth hooks +import { useBingOAuth } from '../../hooks/useBingOAuth'; +import { useGSCConnection } from '../OnboardingWizard/common/useGSCConnection'; + // SEO Dashboard component const SEODashboard: React.FC = () => { // Clerk authentication hooks @@ -51,6 +77,35 @@ const SEODashboard: React.FC = () => { getAnalysisFreshness, } = useSEODashboardStore(); + // OAuth hooks + const { connect: connectBing } = useBingOAuth(); + const { handleGSCConnect } = useGSCConnection(); + + // Platform status state + const [platformStatus, setPlatformStatus] = useState({ + gsc: { connected: false, sites: [], last_sync: null, status: 'disconnected' }, + bing: { + connected: false, + sites: [], + last_sync: null, + status: 'disconnected', + has_expired_tokens: false, + last_token_date: undefined, + total_tokens: 0 + } + }); + + // Menu state + const [userMenuAnchor, setUserMenuAnchor] = useState(null); + const [statusMenuAnchor, setStatusMenuAnchor] = useState(null); + const [lastRefresh, setLastRefresh] = useState(null); + + // Competitor analysis data from onboarding step 3 + const [competitorAnalysisData, setCompetitorAnalysisData] = useState(null); + + // PlatformAnalytics refresh handle + const platformRefreshRef = useRef<(() => Promise) | null>(null); + // Sync dashboard analysis to Copilot store so readables have URL/context const setCopilotAnalysisData = useSEOCopilotStore(state => state.setAnalysisData); useEffect(() => { @@ -62,17 +117,112 @@ const SEODashboard: React.FC = () => { } }, [analysisData, setCopilotAnalysisData]); + // Load competitor analysis data on component mount useEffect(() => { - // Simulate fetching dashboard data - const fetchData = async () => { + loadCompetitorAnalysisData(); + }, []); + + // Reconnect handlers using existing OAuth hooks + const handleGSCReconnect = async () => { + try { + console.log('Initiating GSC reconnect...'); + await handleGSCConnect(); + } catch (error) { + console.error('Error reconnecting GSC:', error); + } + }; + + const handleBingReconnect = async () => { + try { + console.log('Initiating Bing reconnect...'); + // Purge expired tokens before reconnecting to avoid refresh loops + try { + await apiClient.post('/bing/purge-expired'); + console.log('Purged expired Bing tokens before reconnect'); + } catch (purgeError) { + console.warn('Failed to purge expired tokens (non-critical):', purgeError); + } + await connectBing(); + // After successful reconnect, refresh platform status and run analysis + try { + // Invalidate backend analytics cache for Bing + try { + await apiClient.post('/api/analytics/cache/clear', null, { params: { platform: 'bing' } }); + console.log('Cleared backend analytics cache for Bing'); + } catch (cacheErr) { + console.warn('Failed to clear backend analytics cache (non-critical):', cacheErr); + } + + // Invalidate frontend cached analytics + try { + cachedAnalyticsAPI.invalidatePlatformStatus(); + // Optional: clear all analytics cache if available + // @ts-ignore - method may not exist in older builds + cachedAnalyticsAPI.clearCache?.(); + console.log('Cleared frontend analytics cache'); + } catch (feCacheErr) { + console.warn('Failed to clear frontend analytics cache (non-critical):', feCacheErr); + } + + await fetchPlatformStatus(); + } catch (e) { + console.warn('Post-reconnect platform status refresh failed:', e); + } + try { + await useSEODashboardStore.getState().refreshSEOAnalysis(); + } catch (e) { + console.warn('Post-reconnect analysis refresh failed:', e); + } + + // Force PlatformAnalytics to refresh (bypass cache) + try { + await platformRefreshRef.current?.(); + } catch (e) { + console.warn('Platform analytics forced refresh failed (non-critical):', e); + } + } catch (error) { + console.error('Error reconnecting Bing:', error); + } + }; + + // One-run guard to avoid duplicate fetches under StrictMode + const dataFetchedRef = useRef(false); + + // Consolidated data fetching effect + useEffect(() => { + if (dataFetchedRef.current || !isSignedIn) return; + dataFetchedRef.current = true; + + const fetchAllData = async () => { + let websiteUrl = 'https://alwrity.com'; // Default fallback + try { setLoading(true); - // Get user's website URL from user data - const userData = await userDataAPI.getUserData(); - const websiteUrl = userData?.website_url || 'https://alwrity.com'; + // Fetch platform status and user data in parallel + const [platformResponse, userData] = await Promise.all([ + apiClient.get('/api/seo-dashboard/platforms'), + userDataAPI.getUserData() + ]); - // Mock data for demonstration + console.log('Platform status response:', platformResponse.status, platformResponse.statusText); + console.log('Platform status data:', platformResponse.data); + setPlatformStatus(platformResponse.data); + + websiteUrl = userData?.website_url || 'https://alwrity.com'; + + // Fetch real data from backend using authenticated API client + console.log('Fetching SEO dashboard overview...'); + const response = await apiClient.get('/api/seo-dashboard/overview', { + params: { site_url: websiteUrl } + }); + + console.log('SEO overview response:', response.status, response.statusText); + console.log('Real SEO data received:', response.data); + setData(response.data); + } catch (error) { + console.error('Error fetching SEO dashboard data:', error); + // Fallback to mock data on error const mockData = { health_score: { score: 84, @@ -118,26 +268,107 @@ const SEODashboard: React.FC = () => { last_updated: new Date().toISOString(), website_url: websiteUrl || undefined // Convert null to undefined for TypeScript }; - setData(mockData); - setLoading(false); - } catch (err) { - setError('Failed to load dashboard data'); + } finally { setLoading(false); } }; - fetchData(); - }, []); + fetchAllData(); + }, [isSignedIn, setLoading, setData]); useEffect(() => { // Run initial SEO analysis if no data exists if (!loading && !error && data) { - // Call via store to avoid changing function identity in deps - useSEODashboardStore.getState().checkAndRunInitialAnalysis(); + // Check if we have cached analysis data first + const store = useSEODashboardStore.getState(); + store.checkAndRunInitialAnalysis(); + + // If no cached analysis data and we have a website URL, run initial analysis + if (!store.analysisData && data.website_url) { + console.log('No cached analysis data found, running initial SEO analysis...'); + store.runSEOAnalysis(); + } } }, [loading, error, data]); + // Menu handlers + const handleUserMenuOpen = (event: React.MouseEvent) => { + setUserMenuAnchor(event.currentTarget); + }; + + const handleUserMenuClose = () => { + setUserMenuAnchor(null); + }; + + const handleStatusMenuOpen = (event: React.MouseEvent) => { + setStatusMenuAnchor(event.currentTarget); + }; + + const handleStatusMenuClose = () => { + setStatusMenuAnchor(null); + }; + + const handleBackToDashboard = () => { + window.location.href = '/seo-dashboard'; + }; + + const handleRefreshData = async () => { + try { + setLoading(true); + await refreshSEOAnalysis(); + await fetchPlatformStatus(); + setLastRefresh(new Date()); + } catch (error) { + console.error('Error refreshing data:', error); + } finally { + setLoading(false); + } + }; + + // Background jobs visibility (user-triggered) + const [showBackgroundJobs, setShowBackgroundJobs] = useState(false); + + // Platform status fetching function + const fetchPlatformStatus = async () => { + try { + console.log('Fetching platform status...'); + const response = await apiClient.get('/api/seo-dashboard/platforms'); + console.log('Platform status response:', response.status, response.statusText); + console.log('Platform status data:', response.data); + setPlatformStatus(response.data); + } catch (error) { + console.error('Error fetching platform status:', error); + } + }; + + // Load competitor analysis data from onboarding step 3 + const loadCompetitorAnalysisData = () => { + try { + const cachedData = localStorage.getItem('competitor_analysis_data'); + const cachedUrl = localStorage.getItem('competitor_analysis_url'); + const cachedTimestamp = localStorage.getItem('competitor_analysis_timestamp'); + + if (cachedData && cachedUrl && cachedTimestamp) { + const analysisData = JSON.parse(cachedData); + const timestamp = parseInt(cachedTimestamp); + const isRecent = (Date.now() - timestamp) < (7 * 24 * 60 * 60 * 1000); // 7 days + + if (isRecent) { + console.log('Loading competitor analysis data from onboarding step 3:', analysisData); + setCompetitorAnalysisData(analysisData); + } else { + console.log('Competitor analysis data is too old, not loading'); + } + } else { + console.log('No competitor analysis data found in localStorage'); + } + } catch (error) { + console.error('Error loading competitor analysis data:', error); + } + }; + + if (loading) { return ; } @@ -202,137 +433,445 @@ const SEODashboard: React.FC = () => { animate={{ opacity: 1, y: 0 }} transition={{ duration: 0.6 }} > - {/* Header */} - - - - πŸ” SEO Dashboard - - - AI-powered insights and actionable recommendations - - + {/* Professional Compact Header */} + + {/* Left Section - Navigation & Title */} - {/* User Info */} - - - - - - - - {/* Freshness Indicator */} - {(() => { - const freshness = getAnalysisFreshness(); - const chipColor = freshness.isStale ? 'rgba(255, 193, 7, 0.25)' : 'rgba(76, 175, 80, 0.25)'; - const chipBorder = freshness.isStale ? 'rgba(255, 193, 7, 0.45)' : 'rgba(76, 175, 80, 0.45)'; - return ( - - ); - })()} - + + + + + + SEO Dashboard + + + AI-powered insights and recommendations + + + + {/* Center Section - Status Overview */} + + + + + + + + + } + label={(() => { + const freshness = getAnalysisFreshness(); + return freshness.label; + })()} + size="small" + sx={{ + bgcolor: 'rgba(255, 255, 255, 0.1)', + color: 'white', + border: '1px solid rgba(255, 255, 255, 0.2)', + fontSize: '0.75rem' + }} + /> + + + + {/* Right Section - User Menu */} + + + + + + + + + + + {/* Status Menu */} + + + + Platform Status + + + + {/* GSC Status */} + + + + + + Google Search Console: {platformStatus.gsc.connected ? 'Connected' : 'Disconnected'} + + + {!platformStatus.gsc.connected && ( + + )} + + + + {/* Bing Status */} + + + + + + + Bing Webmaster: {platformStatus.bing.connected ? 'Connected' : + platformStatus.bing.status === 'expired' ? 'Expired' : 'Disconnected'} + + {platformStatus.bing.status === 'expired' && platformStatus.bing.last_token_date && ( + + Last connected: {new Date(platformStatus.bing.last_token_date).toLocaleDateString()} + + )} + + + {!platformStatus.bing.connected && ( + + )} + + + + + {/* User Menu */} + + + + {user?.primaryEmailAddress?.emailAddress || 'User'} + + + + + + Refresh Data + + + + + + Sign Out + + + - {/* GSC Connection Section */} - - - {/* CopilotKit Test Panel removed */} - {/* Executive Summary */} + {/* Search Performance Overview */} - - πŸ“Š Performance Overview - - - - - - Organic Traffic - - - {data.metrics.traffic.value} - - + + + πŸ“Š Search Performance Overview + + + + + + + + + { + console.log('Real analytics data loaded:', analyticsData); + }} + onRefreshReady={(fn) => { platformRefreshRef.current = fn; }} + onReconnect={(platform) => { + if (platform === 'gsc') { + handleGSCReconnect(); + } else if (platform === 'bing') { + handleBingReconnect(); + } + }} + showBackgroundJobs={showBackgroundJobs} + /> + + {/* Enhanced Metrics with Tooltips */} + + + + + + + Connected Platforms + + + {(platformStatus.gsc.connected ? 1 : 0) + (platformStatus.bing.connected ? 1 : 0)} + + + of 2 platforms + + + + + + + + + + Total Clicks + + + {data.metrics?.traffic?.value || data.summary?.clicks || 0} + + + from search results + + + + + + + + + + Total Impressions + + + {data.metrics?.impressions?.value || data.summary?.impressions || 0} + + + search appearances + + + + + + + + + + Overall CTR + + + {data.metrics?.ctr?.value || data.summary?.ctr || 0}% + + + click-through rate + + + + - - - - Average Ranking - - - {data.metrics.rankings.value} - - - - - - - Mobile Speed - - - {data.metrics.mobile.value} - - - - - - - Keywords Tracked - - - {data.metrics.keywords.value} - - - - + + {/* Competitive Analysis from Onboarding Step 3 */} + {competitorAnalysisData && ( + + + + 🎯 Competitive Analysis + + + + + + + + + + + + Competitors Found + + + {competitorAnalysisData.competitors?.length || 0} + + + in your market + + + + + + + + + + Social Media Accounts + + + {Object.keys(competitorAnalysisData.social_media_accounts || {}).length} + + + competitor accounts + + + + + + + + + + Social Citations + + + {competitorAnalysisData.social_media_citations?.length || 0} + + + mentions found + + + + + + + {/* Competitor List */} + {competitorAnalysisData.competitors && competitorAnalysisData.competitors.length > 0 && ( + + + Top Competitors + + + {competitorAnalysisData.competitors.slice(0, 6).map((competitor: any, index: number) => ( + + + + {competitor.name || competitor.domain || `Competitor ${index + 1}`} + + + {competitor.domain || competitor.url || 'No domain available'} + + {competitor.description && ( + + {competitor.description.length > 100 + ? `${competitor.description.substring(0, 100)}...` + : competitor.description} + + )} + + + ))} + + + )} + + {/* Research Summary */} + {competitorAnalysisData.research_summary && ( + + + Research Summary + + + + {competitorAnalysisData.research_summary} + + + + )} + + )} + {/* SEO Analyzer Panel */} void; @@ -69,17 +70,28 @@ const GSCLoginButton: React.FC = ({ onStatusChange }) => { setLoading(true); setError(null); - const statusResponse = await gscAPI.getStatus(); - setStatus(statusResponse); + // Use backend API to check GSC status + const response = await apiClient.get('/api/seo-dashboard/platforms'); + const platformData = response.data; + + const gscStatus = { + connected: platformData.gsc?.connected || false, + sites: platformData.gsc?.sites || [], + last_sync: platformData.gsc?.last_sync || undefined + }; + + setStatus(gscStatus); if (onStatusChange) { - onStatusChange(statusResponse.connected); + onStatusChange(gscStatus.connected); } - console.log('GSC Login Button: Status checked, connected:', statusResponse.connected); + console.log('GSC Login Button: Status checked, connected:', gscStatus.connected); } catch (err) { console.error('GSC Login Button: Error checking status:', err); setError('Failed to check GSC connection status'); + // Set disconnected status on error + setStatus({ connected: false, sites: [], last_sync: undefined }); } finally { setLoading(false); } diff --git a/frontend/src/components/shared/BackgroundJobManager.tsx b/frontend/src/components/shared/BackgroundJobManager.tsx index 9f472e4d..9ba0e149 100644 --- a/frontend/src/components/shared/BackgroundJobManager.tsx +++ b/frontend/src/components/shared/BackgroundJobManager.tsx @@ -1,4 +1,4 @@ -import React, { useState, useEffect, useCallback } from 'react'; +import React, { useState, useEffect, useCallback, useRef } from 'react'; import { Box, Button, @@ -206,8 +206,14 @@ const BackgroundJobManager: React.FC = ({ } }; + // One-run guard to prevent duplicate calls in StrictMode + const jobsFetchedRef = useRef(false); + // Poll for job updates useEffect(() => { + if (jobsFetchedRef.current) return; + jobsFetchedRef.current = true; + fetchJobs(); // Only start polling if there are running jobs diff --git a/frontend/src/components/shared/PlatformAnalytics.tsx b/frontend/src/components/shared/PlatformAnalytics.tsx index db22c5ab..375f1a20 100644 --- a/frontend/src/components/shared/PlatformAnalytics.tsx +++ b/frontend/src/components/shared/PlatformAnalytics.tsx @@ -1,4 +1,4 @@ -import React, { useState, useEffect, useCallback } from 'react'; +import React, { useState, useEffect, useCallback, useRef } from 'react'; import { Box, Card, @@ -26,6 +26,7 @@ import { Error as ErrorIcon, Warning, } from '@mui/icons-material'; +import { Button } from '@mui/material'; import { PlatformAnalytics as PlatformAnalyticsType, AnalyticsSummary, PlatformConnectionStatus } from '../../api/analytics'; import { cachedAnalyticsAPI } from '../../api/cachedAnalytics'; import BingInsightsCard from './BingInsightsCard'; @@ -37,6 +38,8 @@ interface PlatformAnalyticsComponentProps { refreshInterval?: number; // in milliseconds, 0 = no auto-refresh onDataLoaded?: (data: any) => void; onRefreshReady?: (refreshFn: () => Promise) => void; // Expose refresh function to parent + onReconnect?: (platform: string) => void; // Reconnect handler for individual platforms + showBackgroundJobs?: boolean; // Only render background jobs when user triggers } const PlatformAnalytics: React.FC = ({ @@ -45,6 +48,8 @@ const PlatformAnalytics: React.FC = ({ refreshInterval = 0, onDataLoaded, onRefreshReady, + onReconnect, + showBackgroundJobs = false, }) => { const [loading, setLoading] = useState(true); const [error, setError] = useState(null); @@ -111,7 +116,13 @@ const PlatformAnalytics: React.FC = ({ } }, [platforms, loadData]); + // One-run guard to prevent duplicate calls in StrictMode + const dataLoadedRef = useRef(false); + useEffect(() => { + if (dataLoadedRef.current) return; + dataLoadedRef.current = true; + loadData(); // Set up auto-refresh if interval is specified @@ -300,9 +311,31 @@ const PlatformAnalytics: React.FC = ({ )} {data.status === 'error' && ( - - {data.error_message || 'Failed to load analytics data'} - + + + {data.error_message || 'Failed to load analytics data'} + + {onReconnect && ( + + )} + )} {data.status === 'partial' && ( @@ -423,18 +456,20 @@ const PlatformAnalytics: React.FC = ({ ))} - {/* Background Job Manager */} - - { - console.log('πŸŽ‰ Background job completed:', job); - // Refresh analytics data when job completes - forceRefresh(); - }} - /> - + {/* Background Job Manager - render only when explicitly enabled */} + {showBackgroundJobs && ( + + { + console.log('πŸŽ‰ Background job completed:', job); + // Refresh analytics data when job completes + forceRefresh(); + }} + /> + + )} {/* Debug Section - Show data structure for all platforms */} diff --git a/frontend/src/hooks/useBingOAuth.ts b/frontend/src/hooks/useBingOAuth.ts index a07e2981..f16de2b0 100644 --- a/frontend/src/hooks/useBingOAuth.ts +++ b/frontend/src/hooks/useBingOAuth.ts @@ -111,6 +111,9 @@ export const useBingOAuth = (): UseBingOAuthReturn => { throw new Error('Failed to open Bing OAuth popup. Please allow popups for this site.'); } + // Track if we've already handled success/error to avoid duplicate processing + let messageHandled = false; + // Listen for popup completion and messages const messageHandler = (event: MessageEvent) => { console.log('Bing OAuth: Message received from any source:', { @@ -139,6 +142,7 @@ export const useBingOAuth = (): UseBingOAuthReturn => { if (event.data?.type === 'BING_OAUTH_SUCCESS') { console.log('Bing OAuth: Success message received:', event.data); + messageHandled = true; popup.close(); window.removeEventListener('message', messageHandler); @@ -148,6 +152,7 @@ export const useBingOAuth = (): UseBingOAuthReturn => { }, 1000); } else if (event.data?.type === 'BING_OAUTH_ERROR') { console.error('Bing OAuth: Error message received:', event.data); + messageHandled = true; popup.close(); window.removeEventListener('message', messageHandler); setError(event.data.error || 'Bing OAuth connection failed'); @@ -170,7 +175,13 @@ export const useBingOAuth = (): UseBingOAuthReturn => { clearInterval(checkClosed); window.removeEventListener('message', messageHandler); console.log('Bing OAuth: Popup closed, refreshing status...'); - console.log('Bing OAuth: Popup closed without receiving success/error message'); + + if (!messageHandled) { + console.log('Bing OAuth: Popup closed without receiving success/error message'); + } else { + console.log('Bing OAuth: Popup closed after successful message handling'); + } + // Refresh status after OAuth completion setTimeout(() => { checkStatus(); @@ -217,10 +228,7 @@ export const useBingOAuth = (): UseBingOAuthReturn => { setError(null); }, []); - // Check status on mount - useEffect(() => { - checkStatus(); - }, [checkStatus]); + // Note: Status check is now handled by the parent component to avoid duplicate API calls return { connected,