Recovered state: integrated TrendSurferAgent, restored frontend/backend files, and cleaned up recovery scripts

2026-02-08 13:56:57 +05:30
parent 1db10ccd0f
commit e404a86502
333 changed files with 42223 additions and 10875 deletions
--- a/backend/api/onboarding_utils/api_key_management_service.py
+++ b/backend/api/onboarding_utils/api_key_management_service.py
@@ -20,14 +20,8 @@ class APIKeyManagementService:
        # Ensure database service is available
        if not hasattr(self.api_key_manager, 'use_database'):
            self.api_key_manager.use_database = True
-            try:
-                from services.onboarding.database_service import OnboardingDatabaseService
-                self.api_key_manager.db_service = OnboardingDatabaseService()
-                logger.info("Database service initialized for APIKeyManager")
-            except Exception as e:
-                logger.warning(f"Database service not available: {e}")
-                self.api_key_manager.use_database = False
-                self.api_key_manager.db_service = None
+            # Legacy service removed - using direct DB access
+            self.api_key_manager.db_service = None
        
        # Simple cache for API keys
        self._api_keys_cache = None
@@ -77,18 +71,28 @@ class APIKeyManagementService:
        """
        try:
            # Prefer DB per-user keys when user_id is provided and DB is available
-            if user_id and getattr(self.api_key_manager, 'use_database', False) and getattr(self.api_key_manager, 'db_service', None):
+            if user_id and getattr(self.api_key_manager, 'use_database', False):
                try:
                    from services.database import SessionLocal
+                    from models.onboarding import APIKey
+                    
                    db = SessionLocal()
                    try:
-                        api_keys = self.api_key_manager.db_service.get_api_keys(user_id, db) or {}
-                        logger.info(f"Loaded {len(api_keys)} API keys from database for user {user_id}")
-                        return {
-                            "api_keys": api_keys,
-                            "total_providers": len(api_keys),
-                            "configured_providers": [k for k, v in api_keys.items() if v]
-                        }
+                        # Direct DB query instead of legacy service
+                        api_keys_records = db.query(APIKey).filter(
+                            APIKey.user_id == user_id,
+                            APIKey.is_active == True
+                        ).all()
+                        
+                        api_keys = {k.provider: k.api_key for k in api_keys_records}
+                        
+                        if api_keys:
+                            logger.info(f"Loaded {len(api_keys)} API keys from database for user {user_id}")
+                            return {
+                                "api_keys": api_keys,
+                                "total_providers": len(api_keys),
+                                "configured_providers": [k for k, v in api_keys.items() if v]
+                            }
                    finally:
                        db.close()
                except Exception as db_err:
--- a/backend/api/onboarding_utils/business_info_service.py
+++ b/backend/api/onboarding_utils/business_info_service.py
@@ -19,9 +19,10 @@ class BusinessInfoService:
            from models.business_info_request import BusinessInfoRequest
            from services.business_info_service import business_info_service
            
-            logger.info(f"🔄 Saving business info for user_id: {business_info.user_id}")
-            result = business_info_service.save_business_info(business_info)
-            logger.success(f"✅ Business info saved successfully for user_id: {business_info.user_id}")
+            request_model = BusinessInfoRequest(**business_info)
+            logger.info(f"🔄 Saving business info for user_id: {request_model.user_id}")
+            result = business_info_service.save_business_info(request_model)
+            logger.success(f"✅ Business info saved successfully for user_id: {request_model.user_id}")
            return result
        except Exception as e:
            logger.error(f"❌ Error saving business info: {str(e)}")
@@ -46,7 +47,7 @@ class BusinessInfoService:
            logger.error(f"❌ Error getting business info: {str(e)}")
            raise HTTPException(status_code=500, detail=f"Failed to get business info: {str(e)}")
    
-    async def get_business_info_by_user(self, user_id: int) -> Dict[str, Any]:
+    async def get_business_info_by_user(self, user_id: str) -> Dict[str, Any]:
        """Get business information by user ID."""
        try:
            from services.business_info_service import business_info_service
--- a/backend/api/onboarding_utils/endpoints_config_data.py
+++ b/backend/api/onboarding_utils/endpoints_config_data.py
@@ -162,7 +162,7 @@ async def generate_persona_preview(user_id: int = 1):
        raise HTTPException(status_code=500, detail="Internal server error")


-async def generate_writing_persona(user_id: int = 1):
+async def generate_writing_persona(user_id: str):
    try:
        from api.onboarding_utils.persona_management_service import PersonaManagementService
        persona_service = PersonaManagementService()
@@ -202,7 +202,7 @@ async def get_business_info(business_info_id: int):
        raise HTTPException(status_code=500, detail=f"Failed to get business info: {str(e)}")


-async def get_business_info_by_user(user_id: int):
+async def get_business_info_by_user(user_id: str):
    try:
        from api.onboarding_utils.business_info_service import BusinessInfoService
        business_service = BusinessInfoService()
--- a/backend/api/onboarding_utils/endpoints_core.py
+++ b/backend/api/onboarding_utils/endpoints_core.py
@@ -5,7 +5,7 @@ from fastapi import HTTPException, Depends

 from middleware.auth_middleware import get_current_user

-from services.onboarding.progress_service import get_onboarding_progress_service
+from services.onboarding.progress_service import OnboardingProgressService


 def health_check():
@@ -14,12 +14,15 @@ def health_check():

 async def initialize_onboarding(current_user: Dict[str, Any] = Depends(get_current_user)):
    try:
+        if not current_user or not current_user.get('id'):
+            logger.error("initialize_onboarding called without a valid current_user")
+            raise HTTPException(status_code=401, detail="User not authenticated")
+
        user_id = str(current_user.get('id'))
-        progress_service = get_onboarding_progress_service()
+        progress_service = OnboardingProgressService()
        status = progress_service.get_onboarding_status(user_id)

-        # Get completion data for step validation
-        completion_data = progress_service.get_completion_data(user_id)
+        completion_data = progress_service.get_completion_data(user_id) or {}
        
        # Build steps data based on database state
        steps_data = []
@@ -29,20 +32,20 @@ async def initialize_onboarding(current_user: Dict[str, Any] = Depends(get_curre
            
            # Check if step is completed based on database data
            if step_num == 1:  # API Keys
-                api_keys = completion_data.get('api_keys', {})
+                api_keys = completion_data.get('api_keys') or {}
                step_completed = any(v for v in api_keys.values() if v)
            elif step_num == 2:  # Website Analysis
-                website = completion_data.get('website_analysis', {})
+                website = completion_data.get('website_analysis') or {}
                step_completed = bool(website.get('website_url') or website.get('writing_style'))
                if step_completed:
                    step_data = website
            elif step_num == 3:  # Research Preferences
-                research = completion_data.get('research_preferences', {})
+                research = completion_data.get('research_preferences') or {}
                step_completed = bool(research.get('research_depth') or research.get('content_types'))
                if step_completed:
                    step_data = research
            elif step_num == 4:  # Persona Generation
-                persona = completion_data.get('persona_data', {})
+                persona = completion_data.get('persona_data') or {}
                step_completed = bool(persona.get('corePersona') or persona.get('platformPersonas'))
                if step_completed:
                    step_data = persona
@@ -65,7 +68,7 @@ async def initialize_onboarding(current_user: Dict[str, Any] = Depends(get_curre
        try:
            if not status['is_completed']:
                all_have = (
-                    any(v for v in completion_data.get('api_keys', {}).values() if v) and
+                    any(v for v in (completion_data.get('api_keys') or {}).values() if v) and
                    bool((completion_data.get('website_analysis') or {}).get('website_url') or (completion_data.get('website_analysis') or {}).get('writing_style')) and
                    bool((completion_data.get('research_preferences') or {}).get('research_depth') or (completion_data.get('research_preferences') or {}).get('content_types')) and
                    bool((completion_data.get('persona_data') or {}).get('corePersona') or (completion_data.get('persona_data') or {}).get('platformPersonas'))
--- a/backend/api/onboarding_utils/onboarding_completion_service.py
+++ b/backend/api/onboarding_utils/onboarding_completion_service.py
@@ -4,17 +4,15 @@ Handles the complex logic for completing the onboarding process.
 """

 from typing import Dict, Any, List
-from datetime import datetime
+from datetime import datetime, timedelta
 from fastapi import HTTPException
 from loguru import logger

-from services.onboarding.progress_service import get_onboarding_progress_service
-from services.onboarding.database_service import OnboardingDatabaseService
-from services.database import get_db
+from api.content_planning.services.content_strategy.onboarding import OnboardingDataIntegrationService
+from services.database import get_session_for_user
 from services.persona_analysis_service import PersonaAnalysisService
 from services.research.research_persona_scheduler import schedule_research_persona_generation
 from services.persona.facebook.facebook_persona_scheduler import schedule_facebook_persona_generation
-from services.oauth_token_monitoring_service import create_oauth_monitoring_tasks

 class OnboardingCompletionService:
    """Service for handling onboarding completion logic."""
@@ -26,11 +24,12 @@ class OnboardingCompletionService:
    async def complete_onboarding(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
        """Complete the onboarding process with full validation."""
        try:
+            from services.onboarding.progress_service import OnboardingProgressService
            user_id = str(current_user.get('id'))
-            progress_service = get_onboarding_progress_service()
+            progress_service = OnboardingProgressService()
            
            # Strict DB-only validation now that step persistence is solid
-            missing_steps = self._validate_required_steps_database(user_id)
+            missing_steps = await self._validate_required_steps_database(user_id)
            if missing_steps:
                missing_steps_str = ", ".join(missing_steps)
                raise HTTPException(
@@ -39,7 +38,7 @@ class OnboardingCompletionService:
                )

            # Require API keys in DB for completion
-            self._validate_api_keys(user_id)
+            await self._validate_api_keys(user_id)
            
            # Generate writing persona from onboarding data only if not already present
            persona_generated = await self._generate_persona_from_onboarding(user_id)
@@ -67,9 +66,18 @@ class OnboardingCompletionService:
            
            # Create OAuth token monitoring tasks for connected platforms
            try:
-                from services.database import SessionLocal
-                db = SessionLocal()
+                from services.progressive_setup_service import ProgressiveSetupService
+                
+                db = get_session_for_user(user_id)
                try:
+                    # Initialize user environment (create workspace, setup features)
+                    try:
+                        setup_service = ProgressiveSetupService(db)
+                        setup_service.initialize_user_environment(user_id)
+                        logger.info(f"Initialized user environment for {user_id} on onboarding completion")
+                    except Exception as e:
+                        logger.warning(f"Failed to initialize user environment for {user_id}: {e}")
+
                    monitoring_tasks = create_oauth_monitoring_tasks(user_id, db)
                    logger.info(
                        f"Created {len(monitoring_tasks)} OAuth token monitoring tasks for user {user_id} "
@@ -81,29 +89,200 @@ class OnboardingCompletionService:
                # Non-critical: log but don't fail onboarding completion
                logger.warning(f"Failed to create OAuth token monitoring tasks for user {user_id}: {e}")
            
-            # Create website analysis tasks for user's website and competitors
+            # Schedule website analysis task creation 5 minutes after onboarding completion
+            try:
+                from services.website_analysis_monitoring_service import schedule_website_analysis_task_creation
+                schedule_website_analysis_task_creation(user_id=user_id, delay_minutes=5)
+                logger.info(
+                    f"Scheduled website analysis task creation for user {user_id} "
+                    f"(5 minutes after onboarding completion)"
+                )
+            except Exception as e:
+                logger.warning(f"Failed to schedule website analysis task creation for user {user_id}: {e}")
+
+            # Schedule onboarding full-site SEO audit (non-blocking) ~10 minutes after completion
            try:
                from services.database import SessionLocal
-                from services.website_analysis_monitoring_service import create_website_analysis_tasks
+                from models.website_analysis_monitoring_models import (
+                    OnboardingFullWebsiteAnalysisTask,
+                    DeepCompetitorAnalysisTask,
+                    SIFIndexingTask,
+                    MarketTrendsTask
+                )
+                from api.content_planning.services.content_strategy.onboarding import OnboardingDataIntegrationService
+
                db = SessionLocal()
                try:
-                    result = create_website_analysis_tasks(user_id=user_id, db=db)
-                    if result.get('success'):
-                        tasks_count = result.get('tasks_created', 0)
+                    integration_service = OnboardingDataIntegrationService()
+                    integrated_data = integration_service.get_integrated_data_sync(user_id, db)
+                    website_analysis = integrated_data.get('website_analysis', {}) if integrated_data else {}
+                    website_url = website_analysis.get('website_url')
+
+                    if not website_url:
+                        try:
+                            from services.website_analysis_monitoring_service import clerk_user_id_to_int
+                            from models.onboarding import WebsiteAnalysis
+                            session_id_int = clerk_user_id_to_int(user_id)
+                            analysis = db.query(WebsiteAnalysis).filter(
+                                WebsiteAnalysis.session_id == session_id_int
+                            ).order_by(WebsiteAnalysis.created_at.desc()).first()
+                            if analysis and analysis.website_url:
+                                website_url = analysis.website_url
+                        except Exception:
+                            website_url = None
+
+                    if website_url:
+                        # 1. Schedule Full Site SEO Audit
+                        next_execution = datetime.utcnow() + timedelta(minutes=5)
+                        existing = db.query(OnboardingFullWebsiteAnalysisTask).filter(
+                            OnboardingFullWebsiteAnalysisTask.user_id == user_id,
+                            OnboardingFullWebsiteAnalysisTask.website_url == website_url
+                        ).first()
+
+                        payload = {
+                            'website_url': website_url,
+                            'max_urls': 500,
+                            'created_from': 'onboarding_completion'
+                        }
+
+                        if existing:
+                            existing.status = 'active'
+                            existing.next_execution = next_execution
+                            existing.payload = payload
+                            db.add(existing)
+                        else:
+                            db.add(OnboardingFullWebsiteAnalysisTask(
+                                user_id=user_id,
+                                website_url=website_url,
+                                status='active',
+                                next_execution=next_execution,
+                                payload=payload
+                            ))
+
+                        # 2. Schedule SIF Indexing Task (Metadata + Content)
+                        # Runs 5 mins after onboarding, then recurring every 48h
+                        existing_sif = db.query(SIFIndexingTask).filter(
+                            SIFIndexingTask.user_id == user_id,
+                            SIFIndexingTask.website_url == website_url
+                        ).first()
+                        
+                        payload_sif = {
+                            'website_url': website_url,
+                            'mode': 'initial_indexing',
+                            'created_from': 'onboarding_completion'
+                        }
+                        
+                        if existing_sif:
+                            existing_sif.status = 'active'
+                            existing_sif.next_execution = next_execution
+                            existing_sif.frequency_hours = 48
+                            existing_sif.payload = payload_sif
+                            db.add(existing_sif)
+                        else:
+                            db.add(SIFIndexingTask(
+                                user_id=user_id,
+                                website_url=website_url,
+                                status='active',
+                                next_execution=next_execution,
+                                frequency_hours=48,
+                                payload=payload_sif
+                            ))
+                        
                        logger.info(
-                            f"Created {tasks_count} website analysis tasks for user {user_id} "
-                            f"on onboarding completion"
+                            f"Scheduled SIF indexing task for user {user_id} "
+                            f"({website_url}) at {next_execution.isoformat()}"
                        )
+
+                        # 3. Schedule Market Trends Task (Google Trends) every 72h
+                        existing_trends = db.query(MarketTrendsTask).filter(
+                            MarketTrendsTask.user_id == user_id,
+                            MarketTrendsTask.website_url == website_url
+                        ).first()
+
+                        payload_trends = {
+                            "website_url": website_url,
+                            "geo": "US",
+                            "timeframe": "today 12-m",
+                            "created_from": "onboarding_completion"
+                        }
+
+                        if existing_trends:
+                            existing_trends.status = "active"
+                            existing_trends.next_execution = next_execution
+                            existing_trends.frequency_hours = 72
+                            existing_trends.payload = payload_trends
+                            db.add(existing_trends)
+                        else:
+                            db.add(MarketTrendsTask(
+                                user_id=user_id,
+                                website_url=website_url,
+                                status="active",
+                                next_execution=next_execution,
+                                frequency_hours=72,
+                                payload=payload_trends
+                            ))
+
+                        db.commit()
+                        logger.info(
+                            f"Scheduled onboarding full-site SEO audit for user {user_id} "
+                            f"({website_url}) at {next_execution.isoformat()}"
+                        )
+
+                        try:
+                            research_prefs = integrated_data.get("research_preferences", {}) if isinstance(integrated_data, dict) else {}
+                            competitors = research_prefs.get("competitors") if isinstance(research_prefs, dict) else None
+
+                            if isinstance(competitors, list) and len(competitors) > 0:
+                                existing_deep = db.query(DeepCompetitorAnalysisTask).filter(
+                                    DeepCompetitorAnalysisTask.user_id == user_id,
+                                    DeepCompetitorAnalysisTask.website_url == website_url
+                                ).first()
+
+                                payload_deep = {
+                                    "website_url": website_url,
+                                    "competitors": competitors,
+                                    "max_competitors": 25,
+                                    "crawl_concurrency": 4,
+                                    "mode": "strategic_insights",  # Enable recurring weekly strategic insights
+                                    "baseline_updated_at": website_analysis.get("updated_at") if isinstance(website_analysis, dict) else None,
+                                    "created_from": "onboarding_completion"
+                                }
+
+                                if existing_deep:
+                                    existing_deep.status = "active"
+                                    existing_deep.next_execution = next_execution
+                                    existing_deep.payload = payload_deep
+                                    db.add(existing_deep)
+                                else:
+                                    db.add(DeepCompetitorAnalysisTask(
+                                        user_id=user_id,
+                                        website_url=website_url,
+                                        status="active",
+                                        next_execution=next_execution,
+                                        payload=payload_deep
+                                    ))
+
+                                db.commit()
+                                logger.info(
+                                    f"Scheduled deep competitor analysis for user {user_id} "
+                                    f"({website_url}) at {next_execution.isoformat()} with {len(competitors)} competitors"
+                                )
+                            else:
+                                logger.warning(
+                                    f"Deep competitor analysis not scheduled for user {user_id}: "
+                                    f"no Step 3 competitors available"
+                                )
+                        except Exception as e:
+                            logger.warning(f"Failed to schedule deep competitor analysis for user {user_id}: {e}")
                    else:
-                        error = result.get('error', 'Unknown error')
                        logger.warning(
-                            f"Failed to create website analysis tasks for user {user_id}: {error}"
+                            f"Could not schedule onboarding full-site SEO audit for user {user_id}: "
+                            f"website_url missing"
                        )
                finally:
                    db.close()
            except Exception as e:
-                # Non-critical: log but don't fail onboarding completion
-                logger.warning(f"Failed to create website analysis tasks for user {user_id}: {e}")
+                logger.warning(f"Failed to schedule onboarding full-site SEO audit for user {user_id}: {e}")
            
            return {
                "message": "Onboarding completed successfully",
@@ -118,37 +297,45 @@ class OnboardingCompletionService:
            logger.error(f"Error completing onboarding: {str(e)}")
            raise HTTPException(status_code=500, detail="Internal server error")
    
-    def _validate_required_steps_database(self, user_id: str) -> List[str]:
-        """Validate that all required steps are completed using database only."""
+    async def _validate_required_steps_database(self, user_id: str) -> List[str]:
+        """Validate that all required steps are completed using SSOT integration service."""
        missing_steps = []
        try:
-            db = next(get_db())
-            db_service = OnboardingDatabaseService()
+            db = get_session_for_user(user_id)
+            integration_service = OnboardingDataIntegrationService()
            
            # Debug logging
            logger.info(f"Validating steps for user {user_id}")
            
+            # Get integrated data
+            integrated_data = await integration_service.process_onboarding_data(user_id, db)
+            db.close()
+            
            # Check each required step
            for step_num in self.required_steps:
                step_completed = False
                
                if step_num == 1:  # API Keys
-                    api_keys = db_service.get_api_keys(user_id, db)
-                    logger.info(f"Step 1 - API Keys: {api_keys}")
-                    step_completed = any(v for v in api_keys.values() if v)
+                    api_keys_data = integrated_data.get('api_keys_data', {})
+                    logger.info(f"Step 1 - API Keys: {api_keys_data}")
+                    step_completed = bool(
+                        api_keys_data.get('openai_api_key') or 
+                        api_keys_data.get('anthropic_api_key') or 
+                        api_keys_data.get('google_api_key')
+                    )
                    logger.info(f"Step 1 completed: {step_completed}")
                elif step_num == 2:  # Website Analysis
-                    website = db_service.get_website_analysis(user_id, db)
+                    website = integrated_data.get('website_analysis', {})
                    logger.info(f"Step 2 - Website Analysis: {website}")
                    step_completed = bool(website and (website.get('website_url') or website.get('writing_style')))
                    logger.info(f"Step 2 completed: {step_completed}")
                elif step_num == 3:  # Research Preferences
-                    research = db_service.get_research_preferences(user_id, db)
+                    research = integrated_data.get('research_preferences', {})
                    logger.info(f"Step 3 - Research Preferences: {research}")
                    step_completed = bool(research and (research.get('research_depth') or research.get('content_types')))
                    logger.info(f"Step 3 completed: {step_completed}")
                elif step_num == 4:  # Persona Generation
-                    persona = db_service.get_persona_data(user_id, db)
+                    persona = integrated_data.get('persona_data', {})
                    logger.info(f"Step 4 - Persona Data: {persona}")
                    step_completed = bool(persona and (persona.get('corePersona') or persona.get('platformPersonas')))
                    logger.info(f"Step 4 completed: {step_completed}")
@@ -167,125 +354,23 @@ class OnboardingCompletionService:
            logger.error(f"Error validating required steps: {e}")
            return ["Validation error"]
    
-    def _validate_required_steps(self, user_id: str, progress) -> List[str]:
-        """Validate that all required steps are completed.
-
-        This method trusts the progress tracker, but also falls back to
-        database presence for Steps 2 and 3 so migration from file→DB
-        does not block completion.
-        """
-        missing_steps = []
-        db = None
-        db_service = None
+    async def _validate_api_keys(self, user_id: str):
+        """Validate that API keys are configured for the current user (SSOT)."""
        try:
-            db = next(get_db())
-            db_service = OnboardingDatabaseService(db)
-        except Exception:
-            db = None
-            db_service = None
-
-        logger.info(f"OnboardingCompletionService: Validating steps for user {user_id}")
-        logger.info(f"OnboardingCompletionService: Current step: {progress.current_step}")
-        logger.info(f"OnboardingCompletionService: Required steps: {self.required_steps}")
-
-        for step_num in self.required_steps:
-            step = progress.get_step_data(step_num)
-            logger.info(f"OnboardingCompletionService: Step {step_num} - status: {step.status if step else 'None'}")
-            if step and step.status in [StepStatus.COMPLETED, StepStatus.SKIPPED]:
-                logger.info(f"OnboardingCompletionService: Step {step_num} already completed/skipped")
-                continue
-
-            # DB-aware fallbacks for migration period
-            try:
-                if db_service:
-                    if step_num == 1:
-                        # Treat as completed if user has any API key in DB
-                        keys = db_service.get_api_keys(user_id, db)
-                        if keys and any(v for v in keys.values()):
-                            try:
-                                progress.mark_step_completed(1, {'source': 'db-fallback'})
-                            except Exception:
-                                pass
-                            continue
-                    if step_num == 2:
-                        # Treat as completed if website analysis exists in DB
-                        website = db_service.get_website_analysis(user_id, db)
-                        if website and (website.get('website_url') or website.get('writing_style')):
-                            # Optionally mark as completed in progress to keep state consistent
-                            try:
-                                progress.mark_step_completed(2, {'source': 'db-fallback'})
-                            except Exception:
-                                pass
-                            continue
-                        # Secondary fallback: research preferences captured style data
-                        prefs = db_service.get_research_preferences(user_id, db)
-                        if prefs and (prefs.get('writing_style') or prefs.get('content_characteristics')):
-                            try:
-                                progress.mark_step_completed(2, {'source': 'research-prefs-fallback'})
-                            except Exception:
-                                pass
-                            continue
-                        # Tertiary fallback: persona data created implies earlier steps done
-                        persona = None
-                        try:
-                            persona = db_service.get_persona_data(user_id, db)
-                        except Exception:
-                            persona = None
-                        if persona and persona.get('corePersona'):
-                            try:
-                                progress.mark_step_completed(2, {'source': 'persona-fallback'})
-                            except Exception:
-                                pass
-                            continue
-                    if step_num == 3:
-                        # Treat as completed if research preferences exist in DB
-                        prefs = db_service.get_research_preferences(user_id, db)
-                        if prefs and prefs.get('research_depth'):
-                            try:
-                                progress.mark_step_completed(3, {'source': 'db-fallback'})
-                            except Exception:
-                                pass
-                            continue
-                    if step_num == 4:
-                        # Treat as completed if persona data exists in DB
-                        persona = None
-                        try:
-                            persona = db_service.get_persona_data(user_id, db)
-                        except Exception:
-                            persona = None
-                        if persona and persona.get('corePersona'):
-                            try:
-                                progress.mark_step_completed(4, {'source': 'db-fallback'})
-                            except Exception:
-                                pass
-                            continue
-                    if step_num == 5:
-                        # Treat as completed if integrations data exists in DB
-                        # For now, we'll consider step 5 completed if the user has reached the final step
-                        # This is a simplified approach - in the future, we could check for specific integration data
-                        try:
-                            # Check if user has completed previous steps and is on final step
-                            if progress.current_step >= 6:  # FinalStep is step 6
-                                progress.mark_step_completed(5, {'source': 'final-step-fallback'})
-                                continue
-                        except Exception:
-                            pass
-            except Exception:
-                # If DB check fails, fall back to progress status only
-                pass
-
-            if step:
-                missing_steps.append(step.title)
-        
-        return missing_steps
-    
-    def _validate_api_keys(self, user_id: str):
-        """Validate that API keys are configured for the current user (DB-only)."""
-        try:
-            db = next(get_db())
-            db_service = OnboardingDatabaseService()
-            user_keys = db_service.get_api_keys(user_id, db)
-            if not user_keys or not any(v for v in user_keys.values()):
+            db = get_session_for_user(user_id)
+            integration_service = OnboardingDataIntegrationService()
+            integrated_data = await integration_service.process_onboarding_data(user_id, db)
+            db.close()
+            
+            api_keys_data = integrated_data.get('api_keys_data', {})
+            
+            has_keys = bool(
+                api_keys_data.get('openai_api_key') or 
+                api_keys_data.get('anthropic_api_key') or 
+                api_keys_data.get('google_api_key')
+            )
+            
+            if not has_keys:
                raise HTTPException(
                    status_code=400,
                    detail="Cannot complete onboarding. At least one AI provider API key must be configured in your account."
@@ -303,9 +388,8 @@ class OnboardingCompletionService:
        try:
            persona_service = PersonaAnalysisService()
            
-            # If a persona already exists for this user, skip regeneration
            try:
-                existing = persona_service.get_user_personas(int(user_id))
+                existing = persona_service.get_user_personas(user_id)
                if existing and len(existing) > 0:
                    logger.info("Persona already exists for user %s; skipping regeneration during completion", user_id)
                    return False
@@ -313,8 +397,7 @@ class OnboardingCompletionService:
                # Non-fatal; proceed to attempt generation
                pass

-            # Generate persona for this user
-            persona_result = persona_service.generate_persona_from_onboarding(int(user_id))
+            persona_result = persona_service.generate_persona_from_onboarding(user_id)
            
            if "error" not in persona_result:
                logger.info(f"✅ Writing persona generated during onboarding completion: {persona_result.get('persona_id')}")
--- a/backend/api/onboarding_utils/onboarding_control_service.py
+++ b/backend/api/onboarding_utils/onboarding_control_service.py
@@ -8,6 +8,8 @@ from fastapi import HTTPException
 from loguru import logger

 from services.onboarding.api_key_manager import get_onboarding_progress, get_onboarding_progress_for_user
+from services.database import get_db
+from services.user_workspace_manager import UserWorkspaceManager

 class OnboardingControlService:
    """Service for handling onboarding control operations."""
@@ -17,8 +19,21 @@ class OnboardingControlService:
    
    async def start_onboarding(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
        """Start a new onboarding session."""
+        db_gen = get_db()
+        db = next(db_gen)
        try:
            user_id = str(current_user.get('id'))
+            
+            # Ensure user workspace exists when starting onboarding
+            try:
+                workspace_manager = UserWorkspaceManager(db)
+                workspace_manager.create_user_workspace(user_id)
+                logger.info(f"Verified/Created workspace for user {user_id} at start of onboarding")
+            except Exception as e:
+                logger.error(f"Failed to create workspace for user {user_id}: {e}")
+                # Don't fail onboarding just because workspace creation failed, 
+                # but log it. It might exist or be a permission issue.
+            
            progress = get_onboarding_progress_for_user(user_id)
            progress.reset_progress()
            
@@ -30,13 +45,16 @@ class OnboardingControlService:
        except Exception as e:
            logger.error(f"Error starting onboarding: {str(e)}")
            raise HTTPException(status_code=500, detail="Internal server error")
+        finally:
+            if 'db' in locals():
+                db.close()
    
    async def reset_onboarding(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
        """Reset the onboarding progress for a specific user."""
        try:
-            from services.onboarding.progress_service import get_onboarding_progress_service
+            from services.onboarding.progress_service import OnboardingProgressService
            user_id = str(current_user.get('id'))
-            progress_service = get_onboarding_progress_service()
+            progress_service = OnboardingProgressService()
            success = progress_service.reset_onboarding(user_id)

            if success:
--- a/backend/api/onboarding_utils/onboarding_summary_service.py
+++ b/backend/api/onboarding_utils/onboarding_summary_service.py
@@ -9,10 +9,10 @@ from loguru import logger

 from services.onboarding.api_key_manager import get_api_key_manager
 from services.database import get_db
-from services.onboarding.database_service import OnboardingDatabaseService
 from services.website_analysis_service import WebsiteAnalysisService
 from services.research_preferences_service import ResearchPreferencesService
 from services.persona_analysis_service import PersonaAnalysisService
+from api.content_planning.services.content_strategy.onboarding import OnboardingDataIntegrationService

 class OnboardingSummaryService:
    """Service for handling onboarding summary generation with user isolation."""
@@ -25,21 +25,27 @@ class OnboardingSummaryService:
            user_id: Clerk user ID from authenticated request
        """
        self.user_id = user_id  # Store Clerk user ID (string)
-        self.db_service = OnboardingDatabaseService()
+        self.integration_service = OnboardingDataIntegrationService()
        
-        logger.info(f"OnboardingSummaryService initialized for user {user_id} (database mode)")
+        logger.info(f"OnboardingSummaryService initialized for user {user_id} (SSOT mode)")
    
    async def get_onboarding_summary(self) -> Dict[str, Any]:
        """Get comprehensive onboarding summary for FinalStep."""
        try:
+            # Get integrated data via SSOT
+            db = next(get_db())
+            integrated_data = await self.integration_service.process_onboarding_data(self.user_id, db)
+            db.close()
+            
+            # Extract components from integrated data
+            website_analysis = integrated_data.get('website_analysis', {})
+            research_preferences = integrated_data.get('research_preferences', {})
+            persona_data = integrated_data.get('persona_data', {})
+            canonical_profile = integrated_data.get('canonical_profile', {})
+            api_keys_data = integrated_data.get('api_keys_data', {})
+            
            # Get API keys
-            api_keys = self._get_api_keys()
-            
-            # Get website analysis data
-            website_analysis = self._get_website_analysis()
-            
-            # Get research preferences
-            research_preferences = self._get_research_preferences()
+            api_keys = self._get_api_keys(api_keys_data)
            
            # Get personalization settings
            personalization_settings = self._get_personalization_settings(research_preferences)
@@ -57,22 +63,19 @@ class OnboardingSummaryService:
                "research_preferences": research_preferences,
                "personalization_settings": personalization_settings,
                "persona_readiness": persona_readiness,
-                "integrations": {},  # TODO: Implement integrations data
-                "capabilities": capabilities
+                "integrations": {},
+                "capabilities": capabilities,
+                "canonical_profile": canonical_profile
            }
            
        except Exception as e:
            logger.error(f"Error getting onboarding summary: {str(e)}")
            raise HTTPException(status_code=500, detail="Internal server error")
    
-    def _get_api_keys(self) -> Dict[str, Any]:
-        """Get configured API keys from database."""
+    def _get_api_keys(self, api_keys_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Get configured API keys from integrated data."""
        try:
-            db = next(get_db())
-            api_keys = self.db_service.get_api_keys(self.user_id, db)
-            db.close()
-            
-            if not api_keys:
+            if not api_keys_data:
                return {
                    "openai": {"configured": False, "value": None},
                    "anthropic": {"configured": False, "value": None},
@@ -81,16 +84,16 @@ class OnboardingSummaryService:
            
            return {
                "openai": {
-                    "configured": bool(api_keys.get('openai_api_key')),
-                    "value": api_keys.get('openai_api_key')[:8] + "..." if api_keys.get('openai_api_key') else None
+                    "configured": bool(api_keys_data.get('openai_api_key')),
+                    "value": api_keys_data.get('openai_api_key')[:8] + "..." if api_keys_data.get('openai_api_key') else None
                },
                "anthropic": {
-                    "configured": bool(api_keys.get('anthropic_api_key')),
-                    "value": api_keys.get('anthropic_api_key')[:8] + "..." if api_keys.get('anthropic_api_key') else None
+                    "configured": bool(api_keys_data.get('anthropic_api_key')),
+                    "value": api_keys_data.get('anthropic_api_key')[:8] + "..." if api_keys_data.get('anthropic_api_key') else None
                },
                "google": {
-                    "configured": bool(api_keys.get('google_api_key')),
-                    "value": api_keys.get('google_api_key')[:8] + "..." if api_keys.get('google_api_key') else None
+                    "configured": bool(api_keys_data.get('google_api_key')),
+                    "value": api_keys_data.get('google_api_key')[:8] + "..." if api_keys_data.get('google_api_key') else None
                }
            }
        except Exception as e:
@@ -101,40 +104,6 @@ class OnboardingSummaryService:
                "google": {"configured": False, "value": None}
            }
    
-    def _get_website_analysis(self) -> Optional[Dict[str, Any]]:
-        """Get website analysis data from database."""
-        try:
-            db = next(get_db())
-            website_data = self.db_service.get_website_analysis(self.user_id, db)
-            db.close()
-            return website_data
-        except Exception as e:
-            logger.error(f"Error getting website analysis: {str(e)}")
-            return None
-    
-    async def get_website_analysis_data(self) -> Dict[str, Any]:
-        """Get website analysis data for API endpoint."""
-        try:
-            website_analysis = self._get_website_analysis()
-            return {
-                "website_analysis": website_analysis,
-                "status": "success" if website_analysis else "no_data"
-            }
-        except Exception as e:
-            logger.error(f"Error in get_website_analysis_data: {str(e)}")
-            raise e
-    
-    def _get_research_preferences(self) -> Optional[Dict[str, Any]]:
-        """Get research preferences from database."""
-        try:
-            db = next(get_db())
-            preferences = self.db_service.get_research_preferences(self.user_id, db)
-            db.close()
-            return preferences
-        except Exception as e:
-            logger.error(f"Error getting research preferences: {str(e)}")
-            return None
-    
    def _get_personalization_settings(self, research_preferences: Optional[Dict[str, Any]]) -> Dict[str, Any]:
        """Get personalization settings based on research preferences."""
        if not research_preferences:
@@ -194,4 +163,4 @@ class OnboardingSummaryService:
            return result
        except Exception as e:
            logger.error(f"Error getting research preferences data: {e}")
-            raise
+            raise
--- a/backend/api/onboarding_utils/persona_management_service.py
+++ b/backend/api/onboarding_utils/persona_management_service.py
@@ -13,7 +13,7 @@ class PersonaManagementService:
    def __init__(self):
        pass
    
-    async def check_persona_generation_readiness(self, user_id: int = 1) -> Dict[str, Any]:
+    async def check_persona_generation_readiness(self, user_id: str) -> Dict[str, Any]:
        """Check if user has sufficient data for persona generation."""
        try:
            from api.persona import validate_persona_generation_readiness
@@ -22,7 +22,7 @@ class PersonaManagementService:
            logger.error(f"Error checking persona readiness: {str(e)}")
            raise HTTPException(status_code=500, detail="Internal server error")
    
-    async def generate_persona_preview(self, user_id: int = 1) -> Dict[str, Any]:
+    async def generate_persona_preview(self, user_id: str) -> Dict[str, Any]:
        """Generate a preview of the writing persona without saving."""
        try:
            from api.persona import generate_persona_preview
@@ -31,7 +31,7 @@ class PersonaManagementService:
            logger.error(f"Error generating persona preview: {str(e)}")
            raise HTTPException(status_code=500, detail="Internal server error")
    
-    async def generate_writing_persona(self, user_id: int = 1) -> Dict[str, Any]:
+    async def generate_writing_persona(self, user_id: str) -> Dict[str, Any]:
        """Generate and save a writing persona from onboarding data."""
        try:
            from api.persona import generate_persona, PersonaGenerationRequest
@@ -41,7 +41,7 @@ class PersonaManagementService:
            logger.error(f"Error generating writing persona: {str(e)}")
            raise HTTPException(status_code=500, detail="Internal server error")
    
-    async def get_user_writing_personas(self, user_id: int = 1) -> Dict[str, Any]:
+    async def get_user_writing_personas(self, user_id: str) -> Dict[str, Any]:
        """Get all writing personas for the user."""
        try:
            from api.persona import get_user_personas
--- a/backend/api/onboarding_utils/step3_research_service.py
+++ b/backend/api/onboarding_utils/step3_research_service.py
@@ -62,7 +62,7 @@ class Step3ResearchService:
            logger.info(f"Starting research analysis for user {user_id}, URL: {user_url}")

            # Find the correct onboarding session for this user
-            with get_db_session() as db:
+            with get_db_session(user_id) as db:
                from models.onboarding import OnboardingSession
                session = db.query(OnboardingSession).filter(
                    OnboardingSession.user_id == user_id
@@ -108,17 +108,18 @@ class Step3ResearchService:
                industry_context
            )
            
-            # Store research data in database
-            await self._store_research_data(
-                session_id=actual_session_id,
-                user_url=user_url,
-                competitors=enhanced_competitors,
-                industry_context=industry_context,
-                analysis_metadata={
-                    **competitor_results,
-                    "social_media_data": social_media_results
-                }
-            )
+            # Store research data in database - DEPRECATED in favor of delayed persistence in StepManagementService
+            # await self._store_research_data(
+            #     session_id=actual_session_id,
+            #     user_id=user_id,
+            #     user_url=user_url,
+            #     competitors=enhanced_competitors,
+            #     industry_context=industry_context,
+            #     analysis_metadata={
+            #         **competitor_results,
+            #         "social_media_data": social_media_results
+            #     }
+            # )
            
            # Generate research summary
            research_summary = self._generate_research_summary(
@@ -393,145 +394,21 @@ class Step3ResearchService:
            "competitive_landscape": "moderate" if high_threat_count < len(competitors) * 0.5 else "high"
        }
    
-    async def _store_research_data(
-        self,
-        session_id: str,
-        user_url: str,
-        competitors: List[Dict[str, Any]],
-        industry_context: Optional[str],
-        analysis_metadata: Dict[str, Any]
-    ) -> bool:
-        """
-        Store research data in the database.
-        
-        Args:
-            session_id: Onboarding session ID
-            user_url: User's website URL
-            competitors: Competitor data
-            industry_context: Industry context
-            analysis_metadata: Analysis metadata
-            
-        Returns:
-            Boolean indicating success
-        """
-        try:
-            with get_db_session() as db:
-                # Get onboarding session
-                session = db.query(OnboardingSession).filter(
-                    OnboardingSession.id == int(session_id)
-                ).first()
-
-                if not session:
-                    logger.error(f"Onboarding session {session_id} not found")
-                    return False
-
-                # Store each competitor in CompetitorAnalysis table
-                from models.onboarding import CompetitorAnalysis
-
-                logger.warning(f"🔍 COMPETITOR SAVE: Starting to save {len(competitors)} competitors for session {session_id}")
-                logger.warning(f"  Session ID: {session.id}")
-                logger.warning(f"  Session user_id: {session.user_id}")
-                
-                saved_count = 0
-                failed_count = 0
-                
-                for idx, competitor in enumerate(competitors):
-                    try:
-                        logger.warning(f"🔍 COMPETITOR SAVE: Saving competitor {idx + 1}/{len(competitors)}")
-                        logger.warning(f"  Competitor URL: {competitor.get('url', 'N/A')}")
-                        logger.warning(f"  Competitor Domain: {competitor.get('domain', 'N/A')}")
-                        logger.warning(f"  Has title: {bool(competitor.get('title'))}")
-                        logger.warning(f"  Has summary: {bool(competitor.get('summary'))}")
-                        logger.warning(f"  Has competitive_insights: {bool(competitor.get('competitive_insights'))}")
-                        logger.warning(f"  Has content_insights: {bool(competitor.get('content_insights'))}")
-                        
-                        # Create competitor analysis record
-                        analysis_data = {
-                            "title": competitor.get("title", ""),
-                            "summary": competitor.get("summary", ""),
-                            "relevance_score": competitor.get("relevance_score", 0.5),
-                            "highlights": competitor.get("highlights", []),
-                            "favicon": competitor.get("favicon"),
-                            "image": competitor.get("image"),
-                            "published_date": competitor.get("published_date"),
-                            "author": competitor.get("author"),
-                            "competitive_analysis": competitor.get("competitive_insights", {}),
-                            "content_insights": competitor.get("content_insights", {}),
-                            "industry_context": industry_context,
-                            "analysis_metadata": analysis_metadata,
-                            "completed_at": datetime.utcnow().isoformat()
-                        }
-                        
-                        logger.warning(f"  analysis_data keys: {list(analysis_data.keys())}")
-                        logger.warning(f"  competitive_analysis type: {type(analysis_data.get('competitive_analysis'))}")
-                        logger.warning(f"  content_insights type: {type(analysis_data.get('content_insights'))}")
-                        
-                        competitor_record = CompetitorAnalysis(
-                            session_id=session.id,
-                            competitor_url=competitor.get("url", ""),
-                            competitor_domain=competitor.get("domain", ""),
-                            analysis_data=analysis_data,
-                            status="completed"
-                        )
-
-                        db.add(competitor_record)
-                        saved_count += 1
-                        logger.warning(f"  ✅ Added competitor record {idx + 1} to session")
-                        
-                    except Exception as e:
-                        failed_count += 1
-                        logger.error(f"  ❌ Failed to save competitor {idx + 1}: {str(e)}")
-                        logger.error(f"  Traceback: {traceback.format_exc()}")
-
-                # Store summary in session for quick access (backward compatibility)
-                research_summary = {
-                    "user_url": user_url,
-                    "total_competitors": len(competitors),
-                    "industry_context": industry_context,
-                    "completed_at": datetime.utcnow().isoformat(),
-                    "analysis_metadata": analysis_metadata
-                }
-
-                # Store summary in session (this requires step_data field to exist)
-                # For now, we'll skip this since the model doesn't have step_data
-                # TODO: Add step_data JSON column to OnboardingSession model if needed
-
-                try:
-                    db.commit()
-                    logger.warning(f"🔍 COMPETITOR SAVE: ✅ Committed {saved_count} competitors to database")
-                    logger.warning(f"  Failed: {failed_count}")
-                    
-                    # Verify the save by querying back
-                    from models.onboarding import CompetitorAnalysis
-                    verify_count = db.query(CompetitorAnalysis).filter(
-                        CompetitorAnalysis.session_id == session.id
-                    ).count()
-                    logger.warning(f"🔍 COMPETITOR SAVE: Verification - {verify_count} competitors found in DB for session {session.id}")
-                    
-                    logger.info(f"Stored {len(competitors)} competitors in CompetitorAnalysis table for session {session_id}")
-                    return True
-                except Exception as e:
-                    db.rollback()
-                    logger.error(f"❌ COMPETITOR SAVE: Failed to commit competitors: {str(e)}")
-                    logger.error(f"  Traceback: {traceback.format_exc()}")
-                    return False
-
-        except Exception as e:
-            logger.error(f"Error storing research data: {str(e)}", exc_info=True)
-            return False
+    # _store_research_data removed as it is now handled by StepManagementService via delayed persistence
    
-    async def get_research_data(self, session_id: str) -> Dict[str, Any]:       
+    async def get_research_data(self, session_id: str, user_id: str) -> Dict[str, Any]:       
        """
        Retrieve research data for a session.

        Args:
            session_id: Onboarding session ID
+            user_id: Clerk user ID for database access

        Returns:
            Dictionary containing research data
        """
        try:
-            with get_db_session() as db:
+            with get_db_session(user_id) as db:
                session = db.query(OnboardingSession).filter(
                    OnboardingSession.id == session_id
                ).first()
@@ -571,7 +448,7 @@ class Step3ResearchService:
                                    "image": analysis_data.get("image"),
                                    "published_date": analysis_data.get("published_date"),
                                    "author": analysis_data.get("author"),
-                                    "competitive_insights": analysis_data.get("competitive_analysis", {}),
+                                    "competitive_analysis": analysis_data.get("competitive_analysis", {}),
                                    "content_insights": analysis_data.get("content_insights", {})
                                }
                                competitors.append(competitor_info)
@@ -588,8 +465,12 @@ class Step3ResearchService:
                                    }
                                    mapped_competitors.append(mapped_comp)
                                
+                                # Regenerate research summary from the mapped competitors
+                                research_summary = self._generate_research_summary(mapped_competitors, None)
+                                
                                research_data = {
                                    "competitors": mapped_competitors,
+                                    "research_summary": research_summary,
                                    "completed_at": competitor_records[0].created_at.isoformat() if competitor_records[0].created_at else None
                                }
                    except Exception as e:
--- a/backend/api/onboarding_utils/step3_routes.py
+++ b/backend/api/onboarding_utils/step3_routes.py
@@ -9,7 +9,7 @@ Version: 1.0
 Last Updated: January 2025
 """

-from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends
+from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends, Body
 from pydantic import BaseModel, HttpUrl, Field
 from typing import Dict, List, Optional, Any
 from datetime import datetime
@@ -19,6 +19,15 @@ from loguru import logger
 from middleware.auth_middleware import get_current_user
 from .step3_research_service import Step3ResearchService
 from services.seo_tools.sitemap_service import SitemapService
+from services.database import get_session_for_user
+from api.content_planning.services.content_strategy.onboarding import OnboardingDataIntegrationService
+from models.website_analysis_monitoring_models import (
+    DeepCompetitorAnalysisTask,
+    DeepCompetitorAnalysisExecutionLog,
+    DeepWebsiteCrawlTask,
+    DeepWebsiteCrawlExecutionLog
+)
+from services.research.deep_crawl_service import DeepCrawlService

 router = APIRouter(prefix="/api/onboarding/step3", tags=["Onboarding Step 3 - Research"])

@@ -59,6 +68,104 @@ class ResearchDataResponse(BaseModel):
    research_data: Optional[Dict[str, Any]] = None
    error: Optional[str] = None

+
+@router.get("/scheduled-tasks-status")
+async def scheduled_tasks_status(current_user: dict = Depends(get_current_user)) -> Dict[str, Any]:
+    user_id = str(current_user.get("id"))
+    db = get_session_for_user(user_id)
+    if not db:
+        raise HTTPException(status_code=500, detail="Database connection failed")
+
+    try:
+        integration_service = OnboardingDataIntegrationService()
+        integrated = integration_service.get_integrated_data_sync(user_id, db)
+        
+        # Check for competitors in competitor_analysis (Step 3 persistence) first
+        competitors = integrated.get("competitor_analysis") if isinstance(integrated, dict) else []
+        
+        # If not found, fall back to research_preferences
+        if not competitors:
+            research_prefs = integrated.get("research_preferences", {}) if isinstance(integrated, dict) else {}
+            competitors = research_prefs.get("competitors") if isinstance(research_prefs, dict) else None
+
+        has_competitors = isinstance(competitors, list) and len(competitors) > 0
+
+        website_analysis = integrated.get("website_analysis") if isinstance(integrated, dict) else {}
+        seo_audit = website_analysis.get("seo_audit") if isinstance(website_analysis, dict) else {}
+        sitemap_benchmark_report = seo_audit.get("competitive_sitemap_benchmarking") if isinstance(seo_audit, dict) else None
+        
+        # Check if it's a real report or just status tracking
+        # A full report has 'analysis_type' or 'competitors' or 'benchmark'
+        is_full_report = False
+        if isinstance(sitemap_benchmark_report, dict):
+            if "benchmark" in sitemap_benchmark_report or "competitors" in sitemap_benchmark_report:
+                is_full_report = True
+                
+        sitemap_benchmark_available = is_full_report
+        sitemap_benchmark_last_run = sitemap_benchmark_report.get("timestamp") if isinstance(sitemap_benchmark_report, dict) else None
+        sitemap_benchmark_status = sitemap_benchmark_report.get("status") if isinstance(sitemap_benchmark_report, dict) else None
+        sitemap_benchmark_error = sitemap_benchmark_report.get("error") if isinstance(sitemap_benchmark_report, dict) else None
+
+        # Check for stale processing status (older than 30 minutes)
+        if sitemap_benchmark_status == "processing" and isinstance(sitemap_benchmark_report, dict):
+            started_at_str = sitemap_benchmark_report.get("started_at")
+            if started_at_str:
+                try:
+                    started_at = datetime.fromisoformat(started_at_str)
+                    if (datetime.utcnow() - started_at).total_seconds() > 600:
+                        sitemap_benchmark_status = "failed"
+                        sitemap_benchmark_error = "Task timed out (stale). Please retry."
+                except Exception:
+                    pass
+
+        # Extract error count from the report if available
+        sitemap_error_count = 0
+        if isinstance(sitemap_benchmark_report, dict):
+            competitors_data = sitemap_benchmark_report.get("competitors", {})
+            if isinstance(competitors_data, dict):
+                errors = competitors_data.get("errors", {})
+                if isinstance(errors, dict):
+                    sitemap_error_count = len(errors)
+
+        task = db.query(DeepCompetitorAnalysisTask).filter(
+            DeepCompetitorAnalysisTask.user_id == user_id
+        ).order_by(DeepCompetitorAnalysisTask.updated_at.desc()).first()
+
+        latest_log = None
+        if task:
+            latest_log = db.query(DeepCompetitorAnalysisExecutionLog).filter(
+                DeepCompetitorAnalysisExecutionLog.task_id == task.id
+            ).order_by(DeepCompetitorAnalysisExecutionLog.execution_date.desc()).first()
+
+        return {
+            "deep_competitor_analysis": {
+                "bulb": "green" if has_competitors else "red",
+                "eligible": has_competitors,
+                "reason": None if has_competitors else "No competitors found in Step 3 'Discovered Competitors'.",
+                "task": {
+                    "exists": bool(task),
+                    "status": task.status if task else None,
+                    "next_execution": task.next_execution.isoformat() if task and task.next_execution else None,
+                    "last_run": latest_log.execution_date.isoformat() if latest_log and latest_log.execution_date else None,
+                    "last_status": latest_log.status if latest_log else None
+                }
+            },
+            "competitive_sitemap_benchmarking": {
+                "bulb": "green" if has_competitors else "red",
+                "eligible": has_competitors,
+                "reason": None if has_competitors else "No competitors found in Step 3 'Discovered Competitors'.",
+                "report": {
+                    "available": sitemap_benchmark_available,
+                    "last_run": sitemap_benchmark_last_run,
+                    "error_count": sitemap_error_count,
+                    "status": sitemap_benchmark_status,
+                    "error": sitemap_benchmark_error
+                }
+            }
+        }
+    finally:
+        db.close()
+
 class ResearchHealthResponse(BaseModel):
    """Response model for research service health check."""
    success: bool
@@ -87,10 +194,57 @@ class SitemapAnalysisResponse(BaseModel):
    discovery_method: Optional[str] = None
    error: Optional[str] = None

+class SocialMediaDiscoveryRequest(BaseModel):
+    """Request model for social media discovery."""
+    user_url: str = Field(..., description="User's website URL")
+
+class SocialMediaDiscoveryResponse(BaseModel):
+    """Response model for social media discovery."""
+    success: bool
+    message: str
+    social_media_accounts: Optional[Dict[str, str]] = None
+    error: Optional[str] = None
+
 # Initialize services
 step3_research_service = Step3ResearchService()
 sitemap_service = SitemapService()

+@router.post("/discover-social-media", response_model=SocialMediaDiscoveryResponse)
+async def discover_social_media(
+    request: SocialMediaDiscoveryRequest,
+    current_user: dict = Depends(get_current_user)
+) -> SocialMediaDiscoveryResponse:
+    """
+    Discover social media accounts for a given website.
+    """
+    try:
+        logger.info(f"Starting social media discovery for user: {current_user.get('user_id', 'unknown')}")
+        logger.info(f"Social media discovery request: {request.user_url}")
+        
+        # Use ExaService directly via Step3ResearchService instance
+        result = await step3_research_service.exa_service.discover_social_media_accounts(request.user_url)
+        
+        if result["success"]:
+            return SocialMediaDiscoveryResponse(
+                success=True,
+                message="Social media accounts discovered successfully",
+                social_media_accounts=result.get("social_media_accounts", {})
+            )
+        else:
+            return SocialMediaDiscoveryResponse(
+                success=False,
+                message="Social media discovery failed",
+                error=result.get("error", "Unknown error")
+            )
+            
+    except Exception as e:
+        logger.error(f"Error in social media discovery: {str(e)}")
+        return SocialMediaDiscoveryResponse(
+            success=False,
+            message="An unexpected error occurred",
+            error=str(e)
+        )
+
@router.post("/discover-competitors", response_model=CompetitorDiscoveryResponse)
 async def discover_competitors(
    request: CompetitorDiscoveryRequest,
@@ -168,7 +322,10 @@ async def discover_competitors(
        )

@router.post("/research-data", response_model=ResearchDataResponse)
-async def get_research_data(request: ResearchDataRequest) -> ResearchDataResponse:
+async def get_research_data(
+    request: ResearchDataRequest,
+    current_user: dict = Depends(get_current_user)
+) -> ResearchDataResponse:
    """
    Retrieve research data for a specific onboarding session.
    
@@ -176,7 +333,10 @@ async def get_research_data(request: ResearchDataRequest) -> ResearchDataRespons
    and research summary for the given session.
    """
    try:
-        logger.info(f"Retrieving research data for session {request.session_id}")
+        # Get Clerk user ID for user isolation
+        clerk_user_id = str(current_user.get('id'))
+        
+        logger.info(f"Retrieving research data for session {request.session_id} (user: {clerk_user_id})")
        
        # Validate session ID
        if not request.session_id or len(request.session_id) < 10:
@@ -186,7 +346,7 @@ async def get_research_data(request: ResearchDataRequest) -> ResearchDataRespons
            )
        
        # Retrieve research data
-        result = await step3_research_service.get_research_data(request.session_id)
+        result = await step3_research_service.get_research_data(request.session_id, clerk_user_id)
        
        if result["success"]:
            logger.info(f"Successfully retrieved research data for session {request.session_id}")
@@ -220,6 +380,32 @@ async def get_research_data(request: ResearchDataRequest) -> ResearchDataRespons
            error=str(e)
        )

+@router.get("/sitemap-benchmark-report")
+async def get_sitemap_benchmark_report(current_user: dict = Depends(get_current_user)) -> Dict[str, Any]:
+    """
+    Retrieve the full sitemap benchmark report for the current user.
+    """
+    user_id = str(current_user.get("id"))
+    db = get_session_for_user(user_id)
+    if not db:
+        raise HTTPException(status_code=500, detail="Database connection failed")
+
+    try:
+        integration_service = OnboardingDataIntegrationService()
+        integrated = integration_service.get_integrated_data_sync(user_id, db)
+        
+        website_analysis = integrated.get("website_analysis") if isinstance(integrated, dict) else {}
+        seo_audit = website_analysis.get("seo_audit") if isinstance(website_analysis, dict) else {}
+        sitemap_benchmark_report = seo_audit.get("competitive_sitemap_benchmarking") if isinstance(seo_audit, dict) else None
+        
+        if not sitemap_benchmark_report:
+            raise HTTPException(status_code=404, detail="No sitemap benchmark report found")
+            
+        return sitemap_benchmark_report
+        
+    finally:
+        db.close()
+
@router.get("/health", response_model=ResearchHealthResponse)
 async def health_check() -> ResearchHealthResponse:
    """
@@ -260,14 +446,17 @@ async def health_check() -> ResearchHealthResponse:
        )

@router.post("/validate-session")
-async def validate_session(session_id: str) -> Dict[str, Any]:
+async def validate_session(
+    session_id: str = Body(..., embed=True),
+    current_user: Dict[str, Any] = Depends(get_current_user)
+) -> Dict[str, Any]:
    """
    Validate that a session exists and is ready for Step 3.
    
    This endpoint checks if the session exists and has completed previous steps.
    """
    try:
-        logger.info(f"Validating session {session_id} for Step 3")
+        logger.info(f"Validating session {session_id} for Step 3, user: {current_user.get('id')}")
        
        # Basic validation
        if not session_id or len(session_id) < 10:
@@ -290,12 +479,141 @@ async def validate_session(session_id: str) -> Dict[str, Any]:
        raise
    except Exception as e:
        logger.error(f"Error validating session: {str(e)}")
-        
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# Deep Website Crawl Endpoints
+
+class DeepCrawlRequest(BaseModel):
+    user_url: str
+    schedule: bool = False
+
+@router.post("/deep-crawl/start")
+async def start_deep_crawl(
+    request: DeepCrawlRequest,
+    background_tasks: BackgroundTasks,
+    current_user: dict = Depends(get_current_user)
+):
+    """
+    Start a deep website crawl task.
+    If schedule is True, it sets up the recurring task.
+    If schedule is False, it runs immediately (fire and forget/poll).
+    """
+    user_id = str(current_user.get("id"))
+    db = get_session_for_user(user_id)
+    if not db:
+        raise HTTPException(status_code=500, detail="Database connection failed")
+
+    try:
+        # Check/Create Task
+        task = db.query(DeepWebsiteCrawlTask).filter(
+            DeepWebsiteCrawlTask.user_id == user_id,
+            DeepWebsiteCrawlTask.website_url == request.user_url
+        ).first()
+
+        if not task:
+            task = DeepWebsiteCrawlTask(
+                user_id=user_id,
+                website_url=request.user_url,
+                status="active" if request.schedule else "running",
+                next_execution=datetime.utcnow() if request.schedule else None
+            )
+            db.add(task)
+            db.commit()
+            db.refresh(task)
+        else:
+            task.website_url = request.user_url # Update URL if changed?
+            if request.schedule:
+                task.status = "active"
+                # If scheduling, don't run immediately unless requested?
+                # User said "fire ... OR let it be scheduled".
+                # If this endpoint is called, we assume intent to start OR schedule.
+                # If schedule=True, we might just set it active.
+                # If schedule=False, we run it now.
+                # But typically user might want "Run now AND schedule".
+                # Let's assume this endpoint is "Start Now". Scheduling is separate?
+                # "option to fire and check ... or let it be scheduled"
+                # If "fire", run now.
+                pass
+            else:
+                task.status = "running"
+            db.commit()
+
+        if not request.schedule:
+            # Run immediately in background
+            service = DeepCrawlService()
+            background_tasks.add_task(
+                service.execute_deep_crawl,
+                user_id=user_id,
+                website_url=request.user_url,
+                task_id=task.id
+            )
+            message = "Deep crawl started immediately."
+        else:
+            # Scheduled
+            task.status = "active"
+            task.next_execution = datetime.utcnow() # Scheduler will pick it up
+            db.commit()
+            message = "Deep crawl scheduled."
+
        return {
-            "success": False,
-            "message": "Session validation failed",
-            "error": str(e)
+            "success": True,
+            "message": message,
+            "task_id": task.id,
+            "status": task.status
        }
+    except Exception as e:
+        logger.error(f"Error starting deep crawl: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.get("/deep-crawl/status")
+async def get_deep_crawl_status(
+    current_user: dict = Depends(get_current_user)
+):
+    """
+    Get status of the deep website crawl task.
+    """
+    user_id = str(current_user.get("id"))
+    db = get_session_for_user(user_id)
+    if not db:
+        raise HTTPException(status_code=500, detail="Database connection failed")
+
+    try:
+        task = db.query(DeepWebsiteCrawlTask).filter(
+            DeepWebsiteCrawlTask.user_id == user_id
+        ).order_by(DeepWebsiteCrawlTask.id.desc()).first()
+
+        if not task:
+            return {
+                "exists": False,
+                "status": None
+            }
+
+        latest_log = db.query(DeepWebsiteCrawlExecutionLog).filter(
+            DeepWebsiteCrawlExecutionLog.task_id == task.id
+        ).order_by(DeepWebsiteCrawlExecutionLog.execution_date.desc()).first()
+
+        return {
+            "exists": True,
+            "task_id": task.id,
+            "status": task.status,
+            "last_executed": task.last_executed,
+            "next_execution": task.next_execution,
+            "latest_log": {
+                "status": latest_log.status if latest_log else None,
+                "execution_date": latest_log.execution_date if latest_log else None,
+                "result_summary": latest_log.result_data if latest_log else None,
+                "error": latest_log.error_message if latest_log else None
+            }
+        }
+    except Exception as e:
+        logger.error(f"Error getting deep crawl status: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()

@router.get("/cost-estimate")
 async def get_cost_estimate(
@@ -421,7 +739,8 @@ async def analyze_sitemap_for_onboarding(
            competitors=request.competitors,
            industry_context=request.industry_context,
            analyze_content_trends=request.analyze_content_trends,
-            analyze_publishing_patterns=request.analyze_publishing_patterns
+            analyze_publishing_patterns=request.analyze_publishing_patterns,
+            user_id=str(current_user.get('id'))
        )
        
        # Check if analysis was successful
--- a/backend/api/onboarding_utils/step4_asset_routes.py
+++ b/backend/api/onboarding_utils/step4_asset_routes.py
@@ -0,0 +1,196 @@
+"""
+Step 4 Brand Asset Routes
+Handles brand avatar generation, enhancement, and variation.
+"""
+
+from typing import Dict, Any, Optional
+from fastapi import APIRouter, HTTPException, Depends, File, UploadFile, Form
+from fastapi.responses import FileResponse
+from sqlalchemy.orm import Session
+from pydantic import BaseModel
+from loguru import logger
+from .step4_persona_routes import _extract_user_id
+import base64
+import os
+from pathlib import Path
+from utils.file_storage import save_file_safely, generate_unique_filename
+from services.database import get_db, WORKSPACE_DIR
+from utils.asset_tracker import save_asset_to_library
+
+from services.llm_providers.main_image_generation import (
+    generate_image_with_provider,
+    enhance_image_prompt,
+    generate_image_variation
+)
+
+router = APIRouter()
+
+# --- Models ---
+class AvatarPromptRequest(BaseModel):
+    user_id: Optional[str] = None
+    prompt: str
+    aspect_ratio: str = "1:1"
+    style_preset: Optional[str] = None
+    negative_prompt: Optional[str] = None
+    num_inference_steps: int = 30
+    guidance_scale: float = 7.5
+
+class AvatarEnhanceRequest(BaseModel):
+    user_id: Optional[str] = None
+    prompt: str
+
+class VoiceCloneRequest(BaseModel):
+    user_id: Optional[str] = None
+    voice_name: str
+    description: Optional[str] = None
+    engine: str = "qwen3" # qwen3 or minimax
+
+# --- Routes ---
+
+@router.post("/generate-avatar")
+async def generate_avatar(
+    request: AvatarPromptRequest,
+    db: Session = Depends(get_db)
+):
+    """Generate a brand avatar using available image providers."""
+    try:
+        user_id = _extract_user_id(request.user_id)
+        
+        logger.info(f"Generating avatar for user {user_id} with prompt: {request.prompt}")
+        
+        # 1. Generate Image
+        result = await generate_image_with_provider(
+            prompt=request.prompt,
+            aspect_ratio=request.aspect_ratio,
+            negative_prompt=request.negative_prompt,
+            num_inference_steps=request.num_inference_steps,
+            guidance_scale=request.guidance_scale,
+            style_preset=request.style_preset,
+            user_id=user_id
+        )
+        
+        if not result.get("success"):
+            raise HTTPException(status_code=500, detail=result.get("error", "Generation failed"))
+            
+        # 2. Save to local storage and Asset Library
+        # The result typically contains image_base64 or image_url
+        # For simplicity, we assume image_base64 is returned or we download the URL
+        
+        image_data = result.get("image_base64")
+        if not image_data and result.get("image_url"):
+            # TODO: Download image from URL if needed, or just store URL
+            pass
+            
+        if image_data:
+            # Decode if needed (usually it's already base64 string)
+            # Save file
+            filename = generate_unique_filename("avatar", "png")
+            file_path = save_file_safely(
+                base64.b64decode(image_data) if isinstance(image_data, str) else image_data,
+                user_id,
+                "avatars",
+                filename
+            )
+            
+            # Save to Asset Library
+            asset_id = save_asset_to_library(
+                db=db,
+                user_id=user_id,
+                file_path=file_path,
+                asset_type="image",
+                category="brand_avatar",
+                meta_data={
+                    "prompt": request.prompt,
+                    "provider": result.get("provider", "unknown"),
+                    "style": request.style_preset
+                }
+            )
+            
+            # Construct public URL (this depends on your static file serving setup)
+            # Assuming /api/assets/{user_id}/avatars/{filename}
+            image_url = f"/api/assets/{user_id}/avatars/{filename}"
+            
+            return {
+                "success": True,
+                "image_url": image_url,
+                "image_base64": image_data, # Optional: return base64 for immediate display
+                "asset_id": asset_id
+            }
+            
+        return {"success": False, "error": "No image data returned"}
+
+    except Exception as e:
+        logger.error(f"Avatar generation failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/enhance-prompt")
+async def enhance_prompt_route(
+    request: AvatarEnhanceRequest
+):
+    """Enhance a simple prompt into a detailed midjourney-style prompt."""
+    try:
+        user_id = _extract_user_id(request.user_id)
+        logger.info(f"Enhancing prompt for user {user_id}: {request.prompt}")
+        
+        enhanced_prompt = await enhance_image_prompt(request.prompt)
+        
+        return {
+            "success": True,
+            "original_prompt": request.prompt,
+            "optimized_prompt": enhanced_prompt
+        }
+    except Exception as e:
+        logger.error(f"Prompt enhancement failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/create-voice-clone")
+async def create_voice_clone(
+    voice_name: str = Form(...),
+    description: str = Form(None),
+    engine: str = Form("qwen3"),
+    file: UploadFile = File(...),
+    user_id: Optional[str] = Form(None),
+    db: Session = Depends(get_db)
+):
+    """Create a voice clone from an audio file."""
+    try:
+        user_id = _extract_user_id(user_id)
+        logger.info(f"Creating voice clone '{voice_name}' for user {user_id}")
+        
+        # 1. Save uploaded audio file
+        file_content = await file.read()
+        filename = generate_unique_filename("voice_sample", Path(file.filename).suffix.lstrip("."))
+        file_path = save_file_safely(file_content, user_id, "voice_samples", filename)
+        
+        # 2. Call Voice Cloning API (Placeholder for actual implementation)
+        # TODO: Integrate with Minimax or CosyVoice API
+        # For now, we simulate success
+        
+        # 3. Save to Asset Library
+        asset_id = save_asset_to_library(
+            db=db,
+            user_id=user_id,
+            file_path=file_path,
+            asset_type="audio",
+            category="voice_clone",
+            meta_data={
+                "voice_name": voice_name,
+                "engine": engine,
+                "description": description,
+                "original_filename": file.filename
+            }
+        )
+        
+        return {
+            "success": True,
+            "custom_voice_id": f"vc_{asset_id}", # Mock ID
+            "preview_audio_url": f"/api/assets/{user_id}/voice_samples/{filename}",
+            "asset_id": asset_id,
+            "message": "Voice clone created successfully (simulated)"
+        }
+        
+    except Exception as e:
+        logger.error(f"Voice cloning failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
--- a/backend/api/onboarding_utils/step4_persona_routes.py
+++ b/backend/api/onboarding_utils/step4_persona_routes.py
@@ -202,11 +202,24 @@ async def get_latest_persona(current_user: Dict[str, Any] = Depends(get_current_
            raise HTTPException(status_code=404, detail="Cached persona expired")

        return {"success": True, "persona": cached}
-    except HTTPException:
-        raise
+    except HTTPException as he:
+        # Return 200 even for HTTP exceptions (like 404) to prevent frontend connection errors
+        # if the endpoint is called during an auto-initialization phase.
+        logger.warning(f"Persona retrieval notice (returning success=False): {he.detail}")
+        return {
+            "success": False, 
+            "persona": None, 
+            "message": he.detail,
+            "status_code": he.status_code
+        }
    except Exception as e:
-        logger.error(f"Error getting latest persona: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
+        logger.error(f"Error getting latest persona: {e}", exc_info=True)
+        return {
+            "success": False, 
+            "persona": None, 
+            "message": f"Internal error retrieving persona: {str(e)}",
+            "status_code": 500
+        }

@router.post("/step4/persona-save", response_model=Dict[str, Any])
 async def save_persona_update(
@@ -228,8 +241,12 @@ async def save_persona_update(
        logger.info(f"Saved latest persona to cache for user {user_id}")
        return {"success": True}
    except Exception as e:
-        logger.error(f"Error saving latest persona: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
+        logger.error(f"Error saving latest persona: {e}", exc_info=True)
+        return {
+            "success": False, 
+            "message": f"Failed to save persona: {str(e)}",
+            "status_code": 500
+        }

@router.get("/step4/persona-task/{task_id}", response_model=PersonaTaskStatus)
 async def get_persona_task_status(task_id: str):
--- a/backend/api/onboarding_utils/step_management_service.py
+++ b/backend/api/onboarding_utils/step_management_service.py
@@ -4,24 +4,315 @@ Handles onboarding step operations and progress tracking.
 """

 from typing import Dict, Any, List, Optional
+from datetime import datetime
 from fastapi import HTTPException
 from loguru import logger
+from sqlalchemy.orm import Session
+from sqlalchemy.exc import SQLAlchemyError

-from services.onboarding.progress_service import get_onboarding_progress_service
-from services.onboarding.database_service import OnboardingDatabaseService
+from api.content_planning.services.content_strategy.onboarding import OnboardingDataIntegrationService
 from services.database import get_db
+from models.onboarding import OnboardingSession, APIKey, WebsiteAnalysis, ResearchPreferences, PersonaData, CompetitorAnalysis

 class StepManagementService:
    """Service for handling onboarding step management."""
    
    def __init__(self):
-        pass
+        self.integration_service = OnboardingDataIntegrationService()
+
+    def _get_or_create_session(self, user_id: str, db: Session) -> OnboardingSession:
+        """Get or create onboarding session."""
+        session = db.query(OnboardingSession).filter(
+            OnboardingSession.user_id == user_id
+        ).first()
+        
+        if not session:
+            session = OnboardingSession(
+                user_id=user_id,
+                current_step=1,
+                progress=0.0,
+                started_at=datetime.utcnow(),
+                updated_at=datetime.utcnow()
+            )
+            db.add(session)
+            db.commit()
+            db.refresh(session)
+            
+        return session
+
+    def _save_api_key(self, user_id: str, provider: str, api_key: str, db: Session) -> bool:
+        """Save API key directly to database."""
+        try:
+            session = self._get_or_create_session(user_id, db)
+            
+            existing_key = db.query(APIKey).filter(
+                APIKey.session_id == session.id,
+                APIKey.provider == provider
+            ).first()
+            
+            if existing_key:
+                existing_key.key = api_key
+                existing_key.updated_at = datetime.utcnow()
+            else:
+                new_key = APIKey(
+                    session_id=session.id,
+                    provider=provider,
+                    key=api_key
+                )
+                db.add(new_key)
+            
+            db.commit()
+            return True
+        except Exception as e:
+            logger.error(f"Error saving API key for user {user_id}: {e}")
+            db.rollback()
+            raise e
+
+    def _save_website_analysis(self, user_id: str, analysis_data: Dict[str, Any], db: Session) -> bool:
+        """Save website analysis directly to database."""
+        try:
+            session = self._get_or_create_session(user_id, db)
+            
+            # Normalize payload
+            incoming = analysis_data or {}
+            nested = incoming.get('analysis') if isinstance(incoming.get('analysis'), dict) else None
+            
+            # Extract extra fields
+            brand_analysis = (nested or incoming).get('brand_analysis')
+            content_strategy_insights = (nested or incoming).get('content_strategy_insights')
+            meta_info = (nested or incoming).get('meta_info')
+            
+            # Fix: Check both nested and incoming for social_media_presence
+            social_media_presence = (nested or {}).get('social_media_presence') or incoming.get('social_media_presence')
+            
+            seo_audit = (nested or incoming).get('seo_audit')
+            style_patterns = (nested or incoming).get('style_patterns')
+            style_guidelines = (nested or incoming).get('guidelines')
+            sitemap_analysis = (nested or incoming).get('sitemap_analysis')
+            
+            # Prepare crawl_result
+            crawl_result = incoming.get('crawl_result') or {}
+            if not isinstance(crawl_result, dict):
+                crawl_result = {"raw": crawl_result}
+                
+            # Meta info still goes to crawl_result as we didn't add a column for it
+            if meta_info:
+                crawl_result['meta_info'] = meta_info
+                
+            # Store sitemap_analysis in crawl_result as we don't have a dedicated column yet
+            if sitemap_analysis:
+                crawl_result['sitemap_analysis'] = sitemap_analysis
+
+            normalized = {
+                'website_url': incoming.get('website') or incoming.get('website_url') or '',
+                'writing_style': (nested or incoming).get('writing_style'),
+                'content_characteristics': (nested or incoming).get('content_characteristics'),
+                'target_audience': (nested or incoming).get('target_audience'),
+                'content_type': (nested or incoming).get('content_type'),
+                'recommended_settings': (nested or incoming).get('recommended_settings'),
+                'brand_analysis': brand_analysis,
+                'content_strategy_insights': content_strategy_insights,
+                'social_media_presence': social_media_presence,
+                'crawl_result': crawl_result,
+                'seo_audit': seo_audit,
+                'style_patterns': style_patterns,
+                'style_guidelines': style_guidelines
+            }
+            
+            # Filter only valid columns to prevent TypeError
+            valid_columns = [c.name for c in WebsiteAnalysis.__table__.columns if c.name not in ['id', 'session_id', 'created_at', 'updated_at']]
+            filtered_data = {k: v for k, v in normalized.items() if k in valid_columns and v is not None}
+
+            existing_analysis = db.query(WebsiteAnalysis).filter(
+                WebsiteAnalysis.session_id == session.id
+            ).first()
+            
+            if existing_analysis:
+                for key, value in filtered_data.items():
+                    setattr(existing_analysis, key, value)
+                existing_analysis.updated_at = datetime.utcnow()
+            else:
+                new_analysis = WebsiteAnalysis(
+                    session_id=session.id,
+                    **filtered_data
+                )
+                db.add(new_analysis)
+            
+            db.commit()
+            return True
+        except Exception as e:
+            logger.error(f"Error saving website analysis for user {user_id}: {e}")
+            db.rollback()
+            raise e
+
+    def _save_research_preferences(self, user_id: str, research_data: Dict[str, Any], db: Session) -> bool:
+        """Save research preferences directly to database."""
+        try:
+            session = self._get_or_create_session(user_id, db)
+            
+            # Add defaults for required fields if missing to prevent 500 errors
+            # The frontend Step 3 (Competitor Analysis) might not send these
+            if 'research_depth' not in research_data:
+                research_data['research_depth'] = 'Comprehensive'
+            if 'content_types' not in research_data:
+                research_data['content_types'] = ["Blog Posts", "Social Media", "Newsletters"]
+            if 'auto_research' not in research_data:
+                research_data['auto_research'] = True
+            if 'factual_content' not in research_data:
+                research_data['factual_content'] = True
+            
+            existing_prefs = db.query(ResearchPreferences).filter(
+                ResearchPreferences.session_id == session.id
+            ).first()
+            
+            if existing_prefs:
+                # Fix for SQLite DateTime issue: Ensure created_at is a datetime object
+                if hasattr(existing_prefs, 'created_at') and isinstance(existing_prefs.created_at, str):
+                    try:
+                        existing_prefs.created_at = datetime.fromisoformat(existing_prefs.created_at)
+                    except (ValueError, TypeError):
+                        pass
+
+                for key, value in research_data.items():
+                    # Skip metadata fields and id
+                    if key in ['id', 'session_id', 'created_at', 'updated_at']:
+                        continue
+                        
+                    if hasattr(existing_prefs, key) and value is not None:
+                        setattr(existing_prefs, key, value)
+                existing_prefs.updated_at = datetime.utcnow()
+            else:
+                # Filter valid columns only to avoid errors
+                valid_columns = [c.name for c in ResearchPreferences.__table__.columns if c.name not in ['id', 'session_id', 'created_at', 'updated_at']]
+                filtered_data = {k: v for k, v in research_data.items() if k in valid_columns}
+                
+                new_prefs = ResearchPreferences(
+                    session_id=session.id,
+                    **filtered_data
+                )
+                db.add(new_prefs)
+            
+            db.commit()
+            return True
+        except Exception as e:
+            logger.error(f"Error saving research preferences for user {user_id}: {e}")
+            db.rollback()
+            raise e
+
+    def _save_competitor_analysis(self, user_id: str, competitors: List[Dict[str, Any]], industry_context: Optional[str], db: Session) -> bool:
+        """Save competitor analysis results to database."""
+        try:
+            session = self._get_or_create_session(user_id, db)
+            
+            logger.info(f"🔍 COMPETITOR SAVE: Starting to save {len(competitors)} competitors for session {session.id}")
+            
+            saved_count = 0
+            failed_count = 0
+            
+            for idx, competitor in enumerate(competitors):
+                try:
+                    if not competitor or not isinstance(competitor, dict):
+                        logger.warning(f"  ⚠️ Skipping invalid competitor entry at index {idx}: {competitor}")
+                        continue
+
+                    # Use full URL (Text column supports it) and clean it
+                    raw_url = competitor.get("url", "")
+                    competitor_url = raw_url.strip().strip('`').strip() if raw_url else ""
+
+                    # Prepare analysis data
+                    analysis_data = {
+                        "title": competitor.get("title", ""),
+                        "summary": competitor.get("summary", ""),
+                        "relevance_score": competitor.get("relevance_score", 0.5),
+                        "highlights": competitor.get("highlights", []),
+                        "subpages": competitor.get("subpages", []),
+                        "favicon": competitor.get("favicon"),
+                        "image": competitor.get("image"),
+                        "published_date": competitor.get("published_date"),
+                        "author": competitor.get("author"),
+                        "competitive_analysis": competitor.get("competitive_analysis") or competitor.get("competitive_insights", {}),
+                        "content_insights": competitor.get("content_insights", {}),
+                        "industry_context": industry_context,
+                        "completed_at": datetime.utcnow().isoformat()
+                    }
+                    
+                    # Check if competitor already exists for this session
+                    existing_competitor = db.query(CompetitorAnalysis).filter(
+                        CompetitorAnalysis.session_id == session.id,
+                        CompetitorAnalysis.competitor_url == competitor.get("url", "")
+                    ).first()
+
+                    has_details = bool(analysis_data.get("summary") or analysis_data.get("highlights"))
+                    detail_msg = "with rich details" if has_details else "basic info only"
+
+                    if existing_competitor:
+                        existing_competitor.analysis_data = analysis_data
+                        existing_competitor.updated_at = datetime.utcnow()
+                        logger.info(f"  Updated existing competitor {idx + 1} ({detail_msg})")
+                    else:
+                        competitor_record = CompetitorAnalysis(
+                            session_id=session.id,
+                            competitor_url=competitor_url,
+                            competitor_domain=competitor.get("domain", ""),
+                            analysis_data=analysis_data,
+                            status="completed"
+                        )
+                        db.add(competitor_record)
+                        logger.info(f"  Added new competitor {idx + 1} ({detail_msg})")
+                    
+                    saved_count += 1
+                    
+                except Exception as e:
+                    failed_count += 1
+                    logger.error(f"  ❌ Failed to save competitor {idx + 1}: {str(e)}")
+            
+            db.commit()
+            logger.info(f"✅ Saved {saved_count} competitors ({failed_count} failed)")
+            return True
+        except Exception as e:
+            logger.error(f"Error saving competitor analysis for user {user_id}: {e}")
+            db.rollback()
+            raise e
+
+
+    def _save_persona_data(self, user_id: str, persona_data: Dict[str, Any], db: Session) -> bool:
+        """Save persona data directly to database."""
+        try:
+            session = self._get_or_create_session(user_id, db)
+            
+            existing = db.query(PersonaData).filter(
+                PersonaData.session_id == session.id
+            ).first()
+            
+            if existing:
+                existing.core_persona = persona_data.get('corePersona')
+                existing.platform_personas = persona_data.get('platformPersonas')
+                existing.quality_metrics = persona_data.get('qualityMetrics')
+                existing.selected_platforms = persona_data.get('selectedPlatforms', [])
+                existing.updated_at = datetime.utcnow()
+            else:
+                persona = PersonaData(
+                    session_id=session.id,
+                    core_persona=persona_data.get('corePersona'),
+                    platform_personas=persona_data.get('platformPersonas'),
+                    quality_metrics=persona_data.get('qualityMetrics'),
+                    selected_platforms=persona_data.get('selectedPlatforms', [])
+                )
+                db.add(persona)
+            
+            db.commit()
+            return True
+        except Exception as e:
+            logger.error(f"Error saving persona data for user {user_id}: {e}")
+            db.rollback()
+            raise e
    
    async def get_onboarding_status(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
        """Get the current onboarding status (per user)."""
        try:
+            from services.onboarding.progress_service import OnboardingProgressService
            user_id = str(current_user.get('id'))
-            status = get_onboarding_progress_service().get_onboarding_status(user_id)
+            status = OnboardingProgressService().get_onboarding_status(user_id)
            return {
                "is_completed": status["is_completed"],
                "current_step": status["current_step"],
@@ -38,8 +329,9 @@ class StepManagementService:
    async def get_onboarding_progress_full(self, current_user: Dict[str, Any]) -> Dict[str, Any]:
        """Get the full onboarding progress data."""
        try:
+            from services.onboarding.progress_service import OnboardingProgressService
            user_id = str(current_user.get('id'))
-            progress_service = get_onboarding_progress_service()
+            progress_service = OnboardingProgressService()
            status = progress_service.get_onboarding_status(user_id)
            data = progress_service.get_completion_data(user_id)

@@ -125,11 +417,13 @@ class StepManagementService:
        """Get data for a specific step."""
        try:
            user_id = str(current_user.get('id'))
-            db = next(get_db())
-            db_service = OnboardingDatabaseService()
+            db = next(get_db(current_user))
+            
+            # Use SSOT for reading step data
+            integrated_data = self.integration_service.get_integrated_data_sync(user_id, db)

            if step_number == 2:
-                website = db_service.get_website_analysis(user_id, db) or {}
+                website = integrated_data.get('website_analysis', {})
                return {
                    "step_number": 2,
                    "title": "Website",
@@ -140,18 +434,27 @@ class StepManagementService:
                    "validation_errors": []
                }
            if step_number == 3:
-                research = db_service.get_research_preferences(user_id, db) or {}
+                research = integrated_data.get('research_preferences', {})
+                competitors = integrated_data.get('competitor_analysis', [])
+                website = integrated_data.get('website_analysis', {})
+                social_media = website.get('social_media_presence') or website.get('social_media_accounts', {})
+                
+                # Merge competitors into the data
+                step_data = research.copy() if research else {}
+                step_data['competitors'] = competitors
+                step_data['social_media_accounts'] = social_media
+                
                return {
                    "step_number": 3,
                    "title": "Research",
                    "description": "Discover competitors",
-                    "status": 'completed' if (research.get('research_depth') or research.get('content_types')) else 'pending',
+                    "status": 'completed' if (research.get('research_depth') or research.get('content_types') or competitors) else 'pending',
                    "completed_at": None,
-                    "data": research,
+                    "data": step_data,
                    "validation_errors": []
                }
            if step_number == 4:
-                persona = db_service.get_persona_data(user_id, db) or {}
+                persona = integrated_data.get('persona_data', {})
                return {
                    "step_number": 4,
                    "title": "Personalization",
@@ -162,7 +465,8 @@ class StepManagementService:
                    "validation_errors": []
                }

-            status = get_onboarding_progress_service().get_onboarding_status(user_id)
+            from services.onboarding.progress_service import OnboardingProgressService
+            status = OnboardingProgressService().get_onboarding_status(user_id)
            mapping = {
                1: ('API Keys', 'Connect your AI services', status['current_step'] >= 1),
                5: ('Integrations', 'Connect additional services', status['current_step'] >= 5),
@@ -201,8 +505,7 @@ class StepManagementService:
            except ImportError:
                pass

-            db = next(get_db())
-            db_service = OnboardingDatabaseService()
+            db = next(get_db(current_user))
            
            save_errors = []  # Track save failures

@@ -218,12 +521,9 @@ class StepManagementService:
                    for provider, key in api_keys.items():
                        if key:
                            try:
-                                saved = db_service.save_api_key(user_id, provider, key, db)
+                                saved = self._save_api_key(user_id, provider, key, db)
                                if saved:
                                    logger.info(f"✅ Saved API key for provider {provider}")
-                                else:
-                                    # This should not happen anymore since save_api_key now raises exceptions
-                                    raise Exception(f"API key save returned False for provider {provider}")
                            except Exception as e:
                                logger.error(f"❌ BLOCKING ERROR: Failed to save API key for provider {provider}: {str(e)}")
                                raise HTTPException(
@@ -236,18 +536,36 @@ class StepManagementService:
                website_data = request_data.get('data') or request_data
                logger.info(f"🔍 Step 2: Raw request_data keys: {list(request_data.keys()) if request_data else 'None'}")
                logger.info(f"🔍 Step 2: Extracted website_data keys: {list(website_data.keys()) if website_data else 'None'}")
-                logger.info(f"🔍 Step 2: website_data.website: {website_data.get('website') if website_data else 'None'}")
-                logger.info(f"🔍 Step 2: website_data.analysis: {bool(website_data.get('analysis')) if website_data else 'None'}")
-                if website_data.get('analysis'):
-                    logger.info(f"🔍 Step 2: analysis keys: {list(website_data['analysis'].keys()) if isinstance(website_data.get('analysis'), dict) else 'Not dict'}")
                if website_data:
                    try:
-                        saved = db_service.save_website_analysis(user_id, website_data, db)
+                        saved = self._save_website_analysis(user_id, website_data, db)
                        if saved:
                            logger.info(f"✅ Saved website analysis for user {user_id}")
-                        else:
-                            # This should not happen anymore since save_website_analysis now raises exceptions
-                            raise Exception("Website analysis save returned False")
+                            
+                            # Trigger Advertools persona augmentation (Phase 1)
+                            try:
+                                from services.scheduler import get_scheduler
+                                
+                                website_url = website_data.get('website') or website_data.get('website_url')
+                                if website_url:
+                                    scheduler = get_scheduler()
+                                    # Schedule content audit for persona augmentation
+                                    scheduler.schedule_one_time_task(
+                                        func=scheduler.execute_task_by_type,
+                                        run_date=datetime.utcnow() + timedelta(seconds=10), # Start in 10s
+                                        job_id=f"advertools_persona_augmentation_{user_id}",
+                                        kwargs={
+                                            "task_type": "advertools_intelligence",
+                                            "user_id": user_id,
+                                            "payload": {
+                                                "type": "content_audit",
+                                                "website_url": website_url
+                                            }
+                                        }
+                                    )
+                                    logger.info(f"🚀 Triggered Advertools persona augmentation for {website_url}")
+                            except Exception as sched_err:
+                                logger.error(f"Failed to trigger Advertools augmentation: {sched_err}")
                    except Exception as e:
                        logger.error(f"❌ BLOCKING ERROR: Failed to save website analysis: {str(e)}")
                        raise HTTPException(
@@ -261,15 +579,38 @@ class StepManagementService:
                logger.info(f"🔍 Step 3: Raw request_data keys: {list(request_data.keys()) if request_data else 'None'}")
                logger.info(f"🔍 Step 3: Extracted research_data keys: {list(research_data.keys()) if research_data else 'None'}")
                if research_data:
-                    # Note: Competitor data is saved separately via discover-competitors endpoint
-                    # This saves research preferences (content_types, target_audience, etc.)
                    try:
-                        saved = db_service.save_research_preferences(user_id, research_data, db)
+                        saved = self._save_research_preferences(user_id, research_data, db)
                        if saved:
                            logger.info(f"✅ Saved research preferences for user {user_id}")
-                        else:
-                            # This should not happen anymore since save_research_preferences now raises exceptions
-                            raise Exception("Research preferences save returned False")
+                            
+                        # Also save competitors if present
+                        competitors = research_data.get('competitors')
+                        if competitors:
+                            industry_context = research_data.get('industryContext') or research_data.get('industry_context')
+                            logger.info(f"🔍 Step 3: Found {len(competitors)} competitors to save")
+                            self._save_competitor_analysis(user_id, competitors, industry_context, db)
+                            
+                        # Save social media presence if available (Update WebsiteAnalysis)
+                        social_media = research_data.get('social_media_accounts')
+                        if social_media:
+                            logger.info(f"🔍 Step 3: Found social media accounts to save")
+                            try:
+                                session = self._get_or_create_session(user_id, db)
+                                existing_analysis = db.query(WebsiteAnalysis).filter(
+                                    WebsiteAnalysis.session_id == session.id
+                                ).first()
+                                if existing_analysis:
+                                    existing_analysis.social_media_presence = social_media
+                                    existing_analysis.updated_at = datetime.utcnow()
+                                    db.commit()
+                                    logger.info(f"✅ Updated social media presence for user {user_id}")
+                                else:
+                                    logger.warning(f"⚠️ Could not save social media: WebsiteAnalysis not found for user {user_id}")
+                            except Exception as e:
+                                logger.error(f"❌ Failed to save social media presence: {str(e)}")
+                                # Don't block completion for this, as it's secondary data
+                    
                    except Exception as e:
                        logger.error(f"❌ BLOCKING ERROR: Failed to save research preferences: {str(e)}")
                        raise HTTPException(
@@ -284,12 +625,9 @@ class StepManagementService:
                logger.info(f"🔍 Step 4: Extracted persona_data keys: {list(persona_data.keys()) if persona_data else 'None'}")
                if persona_data:
                    try:
-                        saved = db_service.save_persona_data(user_id, persona_data, db)
+                        saved = self._save_persona_data(user_id, persona_data, db)
                        if saved:
                            logger.info(f"✅ Saved persona data for user {user_id}")
-                        else:
-                            # This should not happen anymore since save_persona_data now raises exceptions
-                            raise Exception("Persona data save returned False")
                    except Exception as e:
                        logger.error(f"❌ BLOCKING ERROR: Failed to save persona data: {str(e)}")
                        raise HTTPException(
@@ -298,10 +636,12 @@ class StepManagementService:
                        ) from e

            # Persist current step and progress in DB
-            db_service.update_step(user_id, step_number, db)
+            from services.onboarding.progress_service import OnboardingProgressService
+            progress_service = OnboardingProgressService()
+            progress_service.update_step(user_id, step_number)
            try:
                progress_pct = min(100.0, round((step_number / 6) * 100))
-                db_service.update_progress(user_id, float(progress_pct), db)
+                progress_service.update_progress(user_id, float(progress_pct))
            except Exception as e:
                logger.warning(f"Failed to update progress: {e}")

@@ -309,6 +649,10 @@ class StepManagementService:
            if save_errors:
                logger.warning(f"⚠️ Step {step_number} completed but some data save operations failed: {save_errors}")
            
+            # Refresh SSOT (Canonical Profile) - non-blocking try/except inside method
+            if not save_errors:
+                await self.integration_service.refresh_integrated_data(user_id, db)
+            
            logger.info(f"[complete_step] Step {step_number} persisted to DB for user {user_id}")
            return {
                "message": "Step completed successfully",
@@ -327,6 +671,7 @@ class StepManagementService:
    async def skip_step(self, step_number: int, current_user: Dict[str, Any]) -> Dict[str, Any]:
        """Skip a step (for optional steps)."""
        try:
+            from services.onboarding.api_key_manager import get_onboarding_progress_for_user
            user_id = str(current_user.get('id'))
            progress = get_onboarding_progress_for_user(user_id)
            step = progress.get_step_data(step_number)