From 2a3ad8addc525d78aab14c6fd37411fc2f20f201 Mon Sep 17 00:00:00 2001 From: ajaysi Date: Sat, 11 Oct 2025 22:01:20 +0530 Subject: [PATCH] Fix: Step 6 Data Retrieval Issue --- .../api/onboarding_utils/endpoints_core.py | 30 +++- .../onboarding_utils/step4_persona_routes.py | 163 +++++++++++------- .../services/llm_providers/gemini_provider.py | 19 +- .../components/OnboardingWizard/Wizard.tsx | 33 +++- 4 files changed, 174 insertions(+), 71 deletions(-) diff --git a/backend/api/onboarding_utils/endpoints_core.py b/backend/api/onboarding_utils/endpoints_core.py index 5ce0aa1c..1cd1dafd 100644 --- a/backend/api/onboarding_utils/endpoints_core.py +++ b/backend/api/onboarding_utils/endpoints_core.py @@ -45,6 +45,34 @@ async def initialize_onboarding(current_user: Dict[str, Any] = Depends(get_curre next_step = progress.get_next_incomplete_step() + # Derive a resilient current_step from DB if progress looks unset (production refresh) + derived_current_step = progress.current_step + try: + # Only derive if we're at the initial state + if not progress.is_completed and (progress.current_step in (1, 0)): + from services.onboarding_database_service import OnboardingDatabaseService + from services.database import SessionLocal + db = SessionLocal() + try: + db_service = OnboardingDatabaseService() + # If website analysis exists -> at least step 2 completed + website = db_service.get_website_analysis(user_id, db) + if website and (website.get('website_url') or website.get('writing_style') or website.get('status') == 'completed'): + derived_current_step = max(derived_current_step, 2) + # If competitor research data exists, bump to step 3 (best-effort via preferences) + prefs = db_service.get_research_preferences(user_id, db) + if prefs and (prefs.get('research_depth') or prefs.get('content_types')): + derived_current_step = max(derived_current_step, 3) + # If persona data exists, bump to step 4 + persona = db_service.get_persona_data(user_id, db) + if persona and (persona.get('corePersona') or persona.get('platformPersonas')): + derived_current_step = max(derived_current_step, 4) + finally: + db.close() + except Exception: + # Non-fatal; keep original progress.current_step + pass + response_data = { "user": { "id": user_id, @@ -55,7 +83,7 @@ async def initialize_onboarding(current_user: Dict[str, Any] = Depends(get_curre }, "onboarding": { "is_completed": progress.is_completed, - "current_step": progress.current_step, + "current_step": derived_current_step, "completion_percentage": progress.get_completion_percentage(), "next_step": next_step, "started_at": progress.started_at, diff --git a/backend/api/onboarding_utils/step4_persona_routes.py b/backend/api/onboarding_utils/step4_persona_routes.py index fa03bc55..fa35a26a 100644 --- a/backend/api/onboarding_utils/step4_persona_routes.py +++ b/backend/api/onboarding_utils/step4_persona_routes.py @@ -8,6 +8,7 @@ from typing import Dict, Any, List, Optional, Union from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks from pydantic import BaseModel from loguru import logger +import os # Rate limiting configuration RATE_LIMIT_DELAY_SECONDS = 2.0 # Delay between API calls to prevent quota exhaustion @@ -20,6 +21,7 @@ from services.persona.core_persona.core_persona_service import CorePersonaServic from services.persona.enhanced_linguistic_analyzer import EnhancedLinguisticAnalyzer from services.persona.persona_quality_improver import PersonaQualityImprover from middleware.auth_middleware import get_current_user +from services.user_api_key_context import user_api_keys # In-memory task storage (in production, use Redis or database) persona_tasks: Dict[str, Dict[str, Any]] = {} @@ -495,76 +497,107 @@ async def execute_persona_generation_task(task_id: str, persona_request: Persona update_task_status(task_id, "running", 10, "Starting persona generation...") logger.info(f"Task {task_id}: Status updated to running") - # Step 1: Generate core persona (1 API call) - update_task_status(task_id, "running", 20, "Generating core persona...") - logger.info(f"Task {task_id}: Step 1 - Generating core persona...") - - core_persona = await asyncio.get_event_loop().run_in_executor( - None, - core_persona_service.generate_core_persona, - persona_request.onboarding_data - ) - - if "error" in core_persona: - update_task_status(task_id, "failed", 0, f"Core persona generation failed: {core_persona['error']}") - return - - update_task_status(task_id, "running", 40, "Core persona generated successfully") - - # Add small delay after core persona generation - await asyncio.sleep(1.0) - - # Step 2: Generate platform adaptations with rate limiting (N API calls with delays) - update_task_status(task_id, "running", 50, f"Generating platform adaptations for: {persona_request.selected_platforms}") - platform_personas = {} - - total_platforms = len(persona_request.selected_platforms) - - # Process platforms sequentially with small delays to avoid rate limits - for i, platform in enumerate(persona_request.selected_platforms): + # Inject user-specific API keys into environment for the duration of this background task + user_id = _extract_user_id(current_user) + env_mapping = { + 'gemini': 'GEMINI_API_KEY', + 'exa': 'EXA_API_KEY', + 'openai': 'OPENAI_API_KEY', + 'anthropic': 'ANTHROPIC_API_KEY', + 'mistral': 'MISTRAL_API_KEY', + 'copilotkit': 'COPILOTKIT_API_KEY', + 'tavily': 'TAVILY_API_KEY', + 'serper': 'SERPER_API_KEY', + 'firecrawl': 'FIRECRAWL_API_KEY', + } + original_env: Dict[str, Optional[str]] = {} + with user_api_keys(user_id) as keys: try: - progress = 50 + (i * 40 // total_platforms) - update_task_status(task_id, "running", progress, f"Generating {platform} persona ({i+1}/{total_platforms})") + for provider, env_var in env_mapping.items(): + value = keys.get(provider) + if value: + original_env[env_var] = os.environ.get(env_var) + os.environ[env_var] = value + logger.debug(f"[BG TASK] Injected {env_var} for user {user_id}") + + # Step 1: Generate core persona (1 API call) + update_task_status(task_id, "running", 20, "Generating core persona...") + logger.info(f"Task {task_id}: Step 1 - Generating core persona...") - # Add delay between API calls to prevent rate limiting - if i > 0: # Skip delay for first platform - update_task_status(task_id, "running", progress, f"Rate limiting: Waiting {RATE_LIMIT_DELAY_SECONDS}s before next API call...") - await asyncio.sleep(RATE_LIMIT_DELAY_SECONDS) - - # Generate platform persona - result = await generate_single_platform_persona_async( - core_persona, - platform, + core_persona = await asyncio.get_event_loop().run_in_executor( + None, + core_persona_service.generate_core_persona, persona_request.onboarding_data ) - if isinstance(result, Exception): - error_msg = str(result) - logger.error(f"Platform {platform} generation failed: {error_msg}") - platform_personas[platform] = {"error": error_msg} - elif "error" in result: - error_msg = result['error'] - logger.error(f"Platform {platform} generation failed: {error_msg}") - platform_personas[platform] = result - - # Check for rate limit errors and suggest retry - if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower(): - logger.warning(f"⚠️ Rate limit detected for {platform}. Consider increasing RATE_LIMIT_DELAY_SECONDS") - else: - platform_personas[platform] = result - logger.info(f"✅ {platform} persona generated successfully") - - except Exception as e: - logger.error(f"Platform {platform} generation error: {str(e)}") - platform_personas[platform] = {"error": str(e)} - - # Step 3: Assess quality (no additional API calls - uses existing data) - update_task_status(task_id, "running", 90, "Assessing persona quality...") - quality_metrics = await assess_persona_quality_internal( - core_persona, - platform_personas, - persona_request.user_preferences - ) + if "error" in core_persona: + update_task_status(task_id, "failed", 0, f"Core persona generation failed: {core_persona['error']}") + return + + update_task_status(task_id, "running", 40, "Core persona generated successfully") + + # Add small delay after core persona generation + await asyncio.sleep(1.0) + + # Step 2: Generate platform adaptations with rate limiting (N API calls with delays) + update_task_status(task_id, "running", 50, f"Generating platform adaptations for: {persona_request.selected_platforms}") + platform_personas = {} + + total_platforms = len(persona_request.selected_platforms) + + # Process platforms sequentially with small delays to avoid rate limits + for i, platform in enumerate(persona_request.selected_platforms): + try: + progress = 50 + (i * 40 // total_platforms) + update_task_status(task_id, "running", progress, f"Generating {platform} persona ({i+1}/{total_platforms})") + + # Add delay between API calls to prevent rate limiting + if i > 0: # Skip delay for first platform + update_task_status(task_id, "running", progress, f"Rate limiting: Waiting {RATE_LIMIT_DELAY_SECONDS}s before next API call...") + await asyncio.sleep(RATE_LIMIT_DELAY_SECONDS) + + # Generate platform persona + result = await generate_single_platform_persona_async( + core_persona, + platform, + persona_request.onboarding_data + ) + + if isinstance(result, Exception): + error_msg = str(result) + logger.error(f"Platform {platform} generation failed: {error_msg}") + platform_personas[platform] = {"error": error_msg} + elif "error" in result: + error_msg = result['error'] + logger.error(f"Platform {platform} generation failed: {error_msg}") + platform_personas[platform] = result + + # Check for rate limit errors and suggest retry + if "429" in error_msg or "quota" in error_msg.lower() or "rate limit" in error_msg.lower(): + logger.warning(f"⚠️ Rate limit detected for {platform}. Consider increasing RATE_LIMIT_DELAY_SECONDS") + else: + platform_personas[platform] = result + logger.info(f"✅ {platform} persona generated successfully") + + except Exception as e: + logger.error(f"Platform {platform} generation error: {str(e)}") + platform_personas[platform] = {"error": str(e)} + + # Step 3: Assess quality (no additional API calls - uses existing data) + update_task_status(task_id, "running", 90, "Assessing persona quality...") + quality_metrics = await assess_persona_quality_internal( + core_persona, + platform_personas, + persona_request.user_preferences + ) + finally: + # Restore environment + for env_var, original_value in original_env.items(): + if original_value is None: + os.environ.pop(env_var, None) + else: + os.environ[env_var] = original_value + logger.debug(f"[BG TASK] Restored environment for user {user_id}") # Log performance metrics successful_platforms = len([p for p in platform_personas.values() if "error" not in p]) diff --git a/backend/services/llm_providers/gemini_provider.py b/backend/services/llm_providers/gemini_provider.py index 01617fbd..777a5057 100644 --- a/backend/services/llm_providers/gemini_provider.py +++ b/backend/services/llm_providers/gemini_provider.py @@ -471,12 +471,25 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9, return {"error": "No valid structured response content found"} except ValueError as e: - # API key related errors + # API key related errors should not be retried logger.error(f"API key error in Gemini Pro structured JSON generation: {e}") return {"error": str(e)} except Exception as e: - logger.error(f"Error in Gemini Pro structured JSON generation: {e}") - return {"error": str(e)} + # Let tenacity handle retries, especially for 429 RESOURCE_EXHAUSTED + msg = str(e) + if "RESOURCE_EXHAUSTED" in msg or "429" in msg or "rate limit" in msg.lower(): + # If RetryInfo is present with a retryDelay, honor it before re-raising + try: + import re, time + m = re.search(r"retryDelay':\s*'?(\d+)s" , msg) + if m: + delay_s = int(m.group(1)) + logger.warning(f"Rate limit hit, sleeping {delay_s}s before retry...") + time.sleep(delay_s) + except Exception: + pass + # Re-raise to trigger tenacity's backoff/retry + raise # Removed JSON repair functions to avoid false positives diff --git a/frontend/src/components/OnboardingWizard/Wizard.tsx b/frontend/src/components/OnboardingWizard/Wizard.tsx index 9c2eb7e5..061d25f0 100644 --- a/frontend/src/components/OnboardingWizard/Wizard.tsx +++ b/frontend/src/components/OnboardingWizard/Wizard.tsx @@ -203,6 +203,15 @@ const Wizard: React.FC = ({ onComplete }) => { try { setLoading(true); console.log('Wizard: Starting initialization...'); + // Fast local restore: try localStorage active step first (non-authoritative) + const cachedActiveStep = localStorage.getItem('onboarding_active_step'); + if (cachedActiveStep !== null) { + const stepIdx = Math.max(0, Math.min(steps.length - 1, parseInt(cachedActiveStep, 10))); + if (!Number.isNaN(stepIdx)) { + console.log('Wizard: Provisional activeStep from localStorage:', stepIdx); + setActiveStep(stepIdx); + } + } // Check if we already have init data from App (cached in sessionStorage) const cachedInit = sessionStorage.getItem('onboarding_init'); @@ -232,7 +241,16 @@ const Wizard: React.FC = ({ onComplete }) => { } // Set state from cached data - NO API CALLS NEEDED! - setActiveStep(onboarding.current_step - 1); + let computedStep = Math.max(1, Math.min(steps.length, onboarding.current_step)); + // If localStorage has a higher step index, prefer it for UX continuity + const lsStep = localStorage.getItem('onboarding_active_step'); + if (lsStep !== null) { + const lsIdx = Math.max(0, Math.min(steps.length - 1, parseInt(lsStep, 10))); + if (!Number.isNaN(lsIdx)) { + computedStep = Math.max(computedStep, lsIdx + 1); + } + } + setActiveStep(computedStep - 1); setProgressState(onboarding.completion_percentage); // Note: Session managed by Clerk auth, no need to track separately @@ -273,7 +291,15 @@ const Wizard: React.FC = ({ onComplete }) => { sessionStorage.setItem('onboarding_init', JSON.stringify(response.data)); // Set state from API response - setActiveStep(onboarding.current_step - 1); + let computedStep = Math.max(1, Math.min(steps.length, onboarding.current_step)); + const lsStep = localStorage.getItem('onboarding_active_step'); + if (lsStep !== null) { + const lsIdx = Math.max(0, Math.min(steps.length - 1, parseInt(lsStep, 10))); + if (!Number.isNaN(lsIdx)) { + computedStep = Math.max(computedStep, lsIdx + 1); + } + } + setActiveStep(computedStep - 1); setProgressState(onboarding.completion_percentage); // Note: Session managed by Clerk auth, no need to track separately @@ -487,6 +513,9 @@ const Wizard: React.FC = ({ onComplete }) => { } setActiveStep(nextStep); + try { + localStorage.setItem('onboarding_active_step', String(nextStep)); + } catch (_e) {} console.log('Wizard: Setting activeStep to:', nextStep); // Update progress