Fix: Step 6 Data Retrieval Issue

2025-10-11 22:01:20 +05:30
parent bf65065265
commit 2a3ad8addc
4 changed files with 174 additions and 71 deletions
--- a/backend/api/onboarding_utils/endpoints_core.py
+++ b/backend/api/onboarding_utils/endpoints_core.py
@@ -45,6 +45,34 @@ async def initialize_onboarding(current_user: Dict[str, Any] = Depends(get_curre
        next_step = progress.get_next_incomplete_step()
        # Derive a resilient current_step from DB if progress looks unset (production refresh)
        derived_current_step = progress.current_step
        try:
            # Only derive if we're at the initial state
            if not progress.is_completed and (progress.current_step in (1, 0)):
                from services.onboarding_database_service import OnboardingDatabaseService
                from services.database import SessionLocal
                db = SessionLocal()
                try:
                    db_service = OnboardingDatabaseService()
                    # If website analysis exists -> at least step 2 completed
                    website = db_service.get_website_analysis(user_id, db)
                    if website and (website.get('website_url') or website.get('writing_style') or website.get('status') == 'completed'):
                        derived_current_step = max(derived_current_step, 2)
                    # If competitor research data exists, bump to step 3 (best-effort via preferences)
                    prefs = db_service.get_research_preferences(user_id, db)
                    if prefs and (prefs.get('research_depth') or prefs.get('content_types')):
                        derived_current_step = max(derived_current_step, 3)
                    # If persona data exists, bump to step 4
                    persona = db_service.get_persona_data(user_id, db)
                    if persona and (persona.get('corePersona') or persona.get('platformPersonas')):
                        derived_current_step = max(derived_current_step, 4)
                finally:
                    db.close()
        except Exception:
            # Non-fatal; keep original progress.current_step
            pass
        response_data = {
            "user": {
                "id": user_id,
@@ -55,7 +83,7 @@ async def initialize_onboarding(current_user: Dict[str, Any] = Depends(get_curre
            },
            "onboarding": {
                "is_completed": progress.is_completed,
-                "current_step": progress.current_step,
+                "current_step": derived_current_step,
                "completion_percentage": progress.get_completion_percentage(),
                "next_step": next_step,
                "started_at": progress.started_at,
--- a/backend/api/onboarding_utils/step4_persona_routes.py
+++ b/backend/api/onboarding_utils/step4_persona_routes.py
@@ -8,6 +8,7 @@ from typing import Dict, Any, List, Optional, Union
 from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
 from pydantic import BaseModel
 from loguru import logger
 import os
 # Rate limiting configuration
 RATE_LIMIT_DELAY_SECONDS = 2.0  # Delay between API calls to prevent quota exhaustion
@@ -20,6 +21,7 @@ from services.persona.core_persona.core_persona_service import CorePersonaServic
 from services.persona.enhanced_linguistic_analyzer import EnhancedLinguisticAnalyzer
 from services.persona.persona_quality_improver import PersonaQualityImprover
 from middleware.auth_middleware import get_current_user
 from services.user_api_key_context import user_api_keys
 # In-memory task storage (in production, use Redis or database)
 persona_tasks: Dict[str, Dict[str, Any]] = {}
@@ -495,76 +497,107 @@ async def execute_persona_generation_task(task_id: str, persona_request: Persona
        update_task_status(task_id, "running", 10, "Starting persona generation...")
        logger.info(f"Task {task_id}: Status updated to running")
-        # Step 1: Generate core persona (1 API call)
+        # Inject user-specific API keys into environment for the duration of this background task
-        update_task_status(task_id, "running", 20, "Generating core persona...")
+        user_id = _extract_user_id(current_user)
-        logger.info(f"Task {task_id}: Step 1 - Generating core persona...")
+        env_mapping = {
-        
+            'gemini': 'GEMINI_API_KEY',
-        core_persona = await asyncio.get_event_loop().run_in_executor(
+            'exa': 'EXA_API_KEY',
-            None, 
+            'openai': 'OPENAI_API_KEY',
-            core_persona_service.generate_core_persona, 
+            'anthropic': 'ANTHROPIC_API_KEY',
-            persona_request.onboarding_data
+            'mistral': 'MISTRAL_API_KEY',
-        )
+            'copilotkit': 'COPILOTKIT_API_KEY',
-        
+            'tavily': 'TAVILY_API_KEY',
-        if "error" in core_persona:
+            'serper': 'SERPER_API_KEY',
-            update_task_status(task_id, "failed", 0, f"Core persona generation failed: {core_persona['error']}")
+            'firecrawl': 'FIRECRAWL_API_KEY',
-            return
+        }
-        
+        original_env: Dict[str, Optional[str]] = {}
-        update_task_status(task_id, "running", 40, "Core persona generated successfully")
+        with user_api_keys(user_id) as keys:
        # Add small delay after core persona generation
        await asyncio.sleep(1.0)
        # Step 2: Generate platform adaptations with rate limiting (N API calls with delays)
        update_task_status(task_id, "running", 50, f"Generating platform adaptations for: {persona_request.selected_platforms}")
        platform_personas = {}
        total_platforms = len(persona_request.selected_platforms)
        # Process platforms sequentially with small delays to avoid rate limits
        for i, platform in enumerate(persona_request.selected_platforms):
            try:
-                progress = 50 + (i * 40 // total_platforms)
+                for provider, env_var in env_mapping.items():
-                update_task_status(task_id, "running", progress, f"Generating {platform} persona ({i+1}/{total_platforms})")
+                    value = keys.get(provider)
                    if value:
                        original_env[env_var] = os.environ.get(env_var)
                        os.environ[env_var] = value
                        logger.debug(f"[BG TASK] Injected {env_var} for user {user_id}")
                # Step 1: Generate core persona (1 API call)
                update_task_status(task_id, "running", 20, "Generating core persona...")
                logger.info(f"Task {task_id}: Step 1 - Generating core persona...")
-                # Add delay between API calls to prevent rate limiting
+                core_persona = await asyncio.get_event_loop().run_in_executor(
-                if i > 0:  # Skip delay for first platform
+                    None, 
-                    update_task_status(task_id, "running", progress, f"Rate limiting: Waiting {RATE_LIMIT_DELAY_SECONDS}s before next API call...")
+                    core_persona_service.generate_core_persona, 
                    await asyncio.sleep(RATE_LIMIT_DELAY_SECONDS)
                # Generate platform persona
                result = await generate_single_platform_persona_async(
                    core_persona, 
                    platform, 
                    persona_request.onboarding_data
                )
-                if isinstance(result, Exception):
+                if "error" in core_persona:
-                    error_msg = str(result)
+                    update_task_status(task_id, "failed", 0, f"Core persona generation failed: {core_persona['error']}")
-                    logger.error(f"Platform {platform} generation failed: {error_msg}")
+                    return
-                    platform_personas[platform] = {"error": error_msg}
+                
-                elif "error" in result:
+                update_task_status(task_id, "running", 40, "Core persona generated successfully")
-                    error_msg = result['error']
+                
-                    logger.error(f"Platform {platform} generation failed: {error_msg}")
+                # Add small delay after core persona generation
-                    platform_personas[platform] = result
+                await asyncio.sleep(1.0)
-                    
+                
-                    # Check for rate limit errors and suggest retry
+                # Step 2: Generate platform adaptations with rate limiting (N API calls with delays)
-                    if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower():
+                update_task_status(task_id, "running", 50, f"Generating platform adaptations for: {persona_request.selected_platforms}")
-                        logger.warning(f"⚠️ Rate limit detected for {platform}. Consider increasing RATE_LIMIT_DELAY_SECONDS")
+                platform_personas = {}
-                else:
+                
-                    platform_personas[platform] = result
+                total_platforms = len(persona_request.selected_platforms)
-                    logger.info(f"✅ {platform} persona generated successfully")
+                
-                    
+                # Process platforms sequentially with small delays to avoid rate limits
-            except Exception as e:
+                for i, platform in enumerate(persona_request.selected_platforms):
-                logger.error(f"Platform {platform} generation error: {str(e)}")
+                    try:
-                platform_personas[platform] = {"error": str(e)}
+                        progress = 50 + (i * 40 // total_platforms)
-        
+                        update_task_status(task_id, "running", progress, f"Generating {platform} persona ({i+1}/{total_platforms})")
-        # Step 3: Assess quality (no additional API calls - uses existing data)
+                        
-        update_task_status(task_id, "running", 90, "Assessing persona quality...")
+                        # Add delay between API calls to prevent rate limiting
-        quality_metrics = await assess_persona_quality_internal(
+                        if i > 0:  # Skip delay for first platform
-            core_persona, 
+                            update_task_status(task_id, "running", progress, f"Rate limiting: Waiting {RATE_LIMIT_DELAY_SECONDS}s before next API call...")
-            platform_personas,
+                            await asyncio.sleep(RATE_LIMIT_DELAY_SECONDS)
-            persona_request.user_preferences
+                        
-        )
+                        # Generate platform persona
                        result = await generate_single_platform_persona_async(
                            core_persona, 
                            platform, 
                            persona_request.onboarding_data
                        )
                        if isinstance(result, Exception):
                            error_msg = str(result)
                            logger.error(f"Platform {platform} generation failed: {error_msg}")
                            platform_personas[platform] = {"error": error_msg}
                        elif "error" in result:
                            error_msg = result['error']
                            logger.error(f"Platform {platform} generation failed: {error_msg}")
                            platform_personas[platform] = result
                            # Check for rate limit errors and suggest retry
                            if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower():
                                logger.warning(f"⚠️ Rate limit detected for {platform}. Consider increasing RATE_LIMIT_DELAY_SECONDS")
                        else:
                            platform_personas[platform] = result
                            logger.info(f"✅ {platform} persona generated successfully")
                    except Exception as e:
                        logger.error(f"Platform {platform} generation error: {str(e)}")
                        platform_personas[platform] = {"error": str(e)}
                # Step 3: Assess quality (no additional API calls - uses existing data)
                update_task_status(task_id, "running", 90, "Assessing persona quality...")
                quality_metrics = await assess_persona_quality_internal(
                    core_persona, 
                    platform_personas,
                    persona_request.user_preferences
                )
            finally:
                # Restore environment
                for env_var, original_value in original_env.items():
                    if original_value is None:
                        os.environ.pop(env_var, None)
                    else:
                        os.environ[env_var] = original_value
                logger.debug(f"[BG TASK] Restored environment for user {user_id}")
        # Log performance metrics
        successful_platforms = len([p for p in platform_personas.values() if "error" not in p])
--- a/backend/services/llm_providers/gemini_provider.py
+++ b/backend/services/llm_providers/gemini_provider.py
@@ -471,12 +471,25 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
        return {"error": "No valid structured response content found"}
    except ValueError as e:
-        # API key related errors
+        # API key related errors should not be retried
        logger.error(f"API key error in Gemini Pro structured JSON generation: {e}")
        return {"error": str(e)}
    except Exception as e:
-        logger.error(f"Error in Gemini Pro structured JSON generation: {e}")
+        # Let tenacity handle retries, especially for 429 RESOURCE_EXHAUSTED
-        return {"error": str(e)}
+        msg = str(e)
        if "RESOURCE_EXHAUSTED" in msg or "429" in msg or "rate limit" in msg.lower():
            # If RetryInfo is present with a retryDelay, honor it before re-raising
            try:
                import re, time
                m = re.search(r"retryDelay':\s*'?(\d+)s" , msg)
                if m:
                    delay_s = int(m.group(1))
                    logger.warning(f"Rate limit hit, sleeping {delay_s}s before retry...")
                    time.sleep(delay_s)
            except Exception:
                pass
        # Re-raise to trigger tenacity's backoff/retry
        raise
 # Removed JSON repair functions to avoid false positives
--- a/frontend/src/components/OnboardingWizard/Wizard.tsx
+++ b/frontend/src/components/OnboardingWizard/Wizard.tsx
@@ -203,6 +203,15 @@ const Wizard: React.FC<WizardProps> = ({ onComplete }) => {
      try {
        setLoading(true);
        console.log('Wizard: Starting initialization...');
        // Fast local restore: try localStorage active step first (non-authoritative)
        const cachedActiveStep = localStorage.getItem('onboarding_active_step');
        if (cachedActiveStep !== null) {
          const stepIdx = Math.max(0, Math.min(steps.length - 1, parseInt(cachedActiveStep, 10)));
          if (!Number.isNaN(stepIdx)) {
            console.log('Wizard: Provisional activeStep from localStorage:', stepIdx);
            setActiveStep(stepIdx);
          }
        }
        // Check if we already have init data from App (cached in sessionStorage)
        const cachedInit = sessionStorage.getItem('onboarding_init');
@@ -232,7 +241,16 @@ const Wizard: React.FC<WizardProps> = ({ onComplete }) => {
          }
          // Set state from cached data - NO API CALLS NEEDED!
-          setActiveStep(onboarding.current_step - 1);
+          let computedStep = Math.max(1, Math.min(steps.length, onboarding.current_step));
          // If localStorage has a higher step index, prefer it for UX continuity
          const lsStep = localStorage.getItem('onboarding_active_step');
          if (lsStep !== null) {
            const lsIdx = Math.max(0, Math.min(steps.length - 1, parseInt(lsStep, 10)));
            if (!Number.isNaN(lsIdx)) {
              computedStep = Math.max(computedStep, lsIdx + 1);
            }
          }
          setActiveStep(computedStep - 1);
          setProgressState(onboarding.completion_percentage);
          // Note: Session managed by Clerk auth, no need to track separately
@@ -273,7 +291,15 @@ const Wizard: React.FC<WizardProps> = ({ onComplete }) => {
        sessionStorage.setItem('onboarding_init', JSON.stringify(response.data));
        // Set state from API response
-        setActiveStep(onboarding.current_step - 1);
+        let computedStep = Math.max(1, Math.min(steps.length, onboarding.current_step));
        const lsStep = localStorage.getItem('onboarding_active_step');
        if (lsStep !== null) {
          const lsIdx = Math.max(0, Math.min(steps.length - 1, parseInt(lsStep, 10)));
          if (!Number.isNaN(lsIdx)) {
            computedStep = Math.max(computedStep, lsIdx + 1);
          }
        }
        setActiveStep(computedStep - 1);
        setProgressState(onboarding.completion_percentage);
        // Note: Session managed by Clerk auth, no need to track separately
@@ -487,6 +513,9 @@ const Wizard: React.FC<WizardProps> = ({ onComplete }) => {
    }
    setActiveStep(nextStep);
    try {
      localStorage.setItem('onboarding_active_step', String(nextStep));
    } catch (_e) {}
    console.log('Wizard: Setting activeStep to:', nextStep);
    // Update progress