""" Research Persona Service Handles generation, caching, and retrieval of AI-powered research personas. """ from typing import Dict, Any, Optional from datetime import datetime, timedelta from loguru import logger from fastapi import HTTPException from services.database import get_db_session from models.onboarding import PersonaData, OnboardingSession from models.research_persona_models import ResearchPersona from .research_persona_prompt_builder import ResearchPersonaPromptBuilder from services.llm_providers.main_text_generation import llm_text_gen from services.onboarding.database_service import OnboardingDatabaseService from services.persona_data_service import PersonaDataService class ResearchPersonaService: """Service for generating and managing research personas.""" CACHE_TTL_DAYS = 7 # 7-day cache TTL def __init__(self, db_session=None): self.db = db_session or get_db_session() self.prompt_builder = ResearchPersonaPromptBuilder() self.onboarding_service = OnboardingDatabaseService(db=self.db) self.persona_data_service = PersonaDataService(db_session=self.db) def get_cached_only( self, user_id: str ) -> Optional[ResearchPersona]: """ Get research persona for user ONLY if it exists in cache. This method NEVER generates - it only returns cached personas. Use this for config endpoints to avoid triggering rate limit checks. Args: user_id: User ID (Clerk string) Returns: ResearchPersona if cached and valid, None otherwise """ try: # Get persona data record persona_data = self._get_persona_data_record(user_id) if not persona_data: logger.debug(f"No persona data found for user {user_id}") return None # Only return if cache is valid and persona exists if self.is_cache_valid(persona_data) and persona_data.research_persona: try: logger.debug(f"Returning cached research persona for user {user_id}") return ResearchPersona(**persona_data.research_persona) except Exception as e: logger.warning(f"Failed to parse cached research persona: {e}") return None # Cache invalid or persona missing - return None (don't generate) logger.debug(f"No valid cached research persona for user {user_id}") return None except Exception as e: logger.error(f"Error getting cached research persona for user {user_id}: {e}") return None def get_or_generate( self, user_id: str, force_refresh: bool = False ) -> Optional[ResearchPersona]: """ Get research persona for user, generating if missing or expired. Args: user_id: User ID (Clerk string) force_refresh: If True, regenerate even if cache is valid Returns: ResearchPersona if successful, None otherwise """ try: # Get persona data record persona_data = self._get_persona_data_record(user_id) if not persona_data: logger.warning(f"No persona data found for user {user_id}, cannot generate research persona") return None # Check cache if not forcing refresh if not force_refresh and self.is_cache_valid(persona_data): if persona_data.research_persona: logger.info(f"Using cached research persona for user {user_id}") try: return ResearchPersona(**persona_data.research_persona) except Exception as e: logger.warning(f"Failed to parse cached research persona: {e}, regenerating...") # Fall through to regeneration else: logger.info(f"Research persona missing for user {user_id}, generating...") else: if force_refresh: logger.info(f"Forcing refresh of research persona for user {user_id}") else: logger.info(f"Cache expired for user {user_id}, regenerating...") # Generate new research persona try: research_persona = self.generate_research_persona(user_id) except HTTPException: # Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API raise if research_persona: # Save to database if self.save_research_persona(user_id, research_persona): logger.info(f"✅ Research persona generated and saved for user {user_id}") else: logger.warning(f"Failed to save research persona for user {user_id}") return research_persona else: # Log detailed error for debugging expensive failures logger.error( f"❌ Failed to generate research persona for user {user_id} - " f"This is an expensive failure (API call consumed). Check logs above for details." ) # Don't return None silently - let the caller know this failed return None except HTTPException: # Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API raise except Exception as e: logger.error(f"Error getting/generating research persona for user {user_id}: {e}") return None def generate_research_persona(self, user_id: str) -> Optional[ResearchPersona]: """ Generate a new research persona for the user. Args: user_id: User ID (Clerk string) Returns: ResearchPersona if successful, None otherwise """ try: logger.info(f"Generating research persona for user {user_id}") # Collect onboarding data onboarding_data = self._collect_onboarding_data(user_id) if not onboarding_data: logger.warning(f"Insufficient onboarding data for user {user_id}") return None # Build prompt prompt = self.prompt_builder.build_research_persona_prompt(onboarding_data) # Get JSON schema for structured response json_schema = self.prompt_builder.get_json_schema() # Call LLM with structured JSON response logger.info(f"Calling LLM for research persona generation (user: {user_id})") try: response_text = llm_text_gen( prompt=prompt, json_struct=json_schema, user_id=user_id ) except HTTPException: # Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API logger.warning(f"HTTPException during LLM call for user {user_id} - re-raising") raise except RuntimeError as e: # Re-raise RuntimeError (subscription limits) as HTTPException logger.warning(f"RuntimeError during LLM call for user {user_id}: {e}") raise HTTPException(status_code=429, detail=str(e)) if not response_text: logger.error("Empty response from LLM") return None # Parse JSON response import json try: # When json_struct is provided, llm_text_gen may return a dict directly if isinstance(response_text, dict): # Already parsed, use directly persona_dict = response_text elif isinstance(response_text, str): # Handle case where LLM returns markdown-wrapped JSON or plain JSON string response_text = response_text.strip() if response_text.startswith("```json"): response_text = response_text[7:] if response_text.startswith("```"): response_text = response_text[3:] if response_text.endswith("```"): response_text = response_text[:-3] response_text = response_text.strip() persona_dict = json.loads(response_text) else: logger.error(f"Unexpected response type from LLM: {type(response_text)}") return None # Add generated_at timestamp persona_dict["generated_at"] = datetime.utcnow().isoformat() # Validate and create ResearchPersona # Log the dict structure for debugging if validation fails try: research_persona = ResearchPersona(**persona_dict) logger.info(f"✅ Research persona generated successfully for user {user_id}") return research_persona except Exception as validation_error: logger.error(f"Failed to validate ResearchPersona from dict: {validation_error}") logger.debug(f"Persona dict keys: {list(persona_dict.keys()) if isinstance(persona_dict, dict) else 'Not a dict'}") logger.debug(f"Persona dict sample: {str(persona_dict)[:500]}") # Re-raise to be caught by outer exception handler raise except json.JSONDecodeError as e: logger.error(f"Failed to parse LLM response as JSON: {e}") logger.debug(f"Response text: {response_text[:500] if isinstance(response_text, str) else str(response_text)[:500]}") return None except Exception as e: logger.error(f"Failed to create ResearchPersona from response: {e}") return None except HTTPException: # Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API raise except Exception as e: logger.error(f"Error generating research persona for user {user_id}: {e}") return None def is_cache_valid(self, persona_data: PersonaData) -> bool: """ Check if cached research persona is still valid (within TTL). Args: persona_data: PersonaData database record Returns: True if cache is valid, False otherwise """ if not persona_data.research_persona_generated_at: return False # Check if within TTL cache_age = datetime.utcnow() - persona_data.research_persona_generated_at is_valid = cache_age < timedelta(days=self.CACHE_TTL_DAYS) if not is_valid: logger.debug(f"Cache expired (age: {cache_age.days} days, TTL: {self.CACHE_TTL_DAYS} days)") return is_valid def save_research_persona( self, user_id: str, research_persona: ResearchPersona ) -> bool: """ Save research persona to database. Args: user_id: User ID (Clerk string) research_persona: ResearchPersona to save Returns: True if successful, False otherwise """ try: persona_data = self._get_persona_data_record(user_id) if not persona_data: logger.error(f"No persona data record found for user {user_id}") return False # Convert ResearchPersona to dict for JSON storage persona_dict = research_persona.dict() # Update database record persona_data.research_persona = persona_dict persona_data.research_persona_generated_at = datetime.utcnow() self.db.commit() logger.info(f"✅ Research persona saved for user {user_id}") return True except Exception as e: logger.error(f"Error saving research persona for user {user_id}: {e}") self.db.rollback() return False def _get_persona_data_record(self, user_id: str) -> Optional[PersonaData]: """Get PersonaData database record for user.""" try: # Ensure research_persona columns exist before querying self.onboarding_service._ensure_research_persona_columns(self.db) # Get onboarding session session = self.db.query(OnboardingSession).filter( OnboardingSession.user_id == user_id ).first() if not session: return None # Get persona data persona_data = self.db.query(PersonaData).filter( PersonaData.session_id == session.id ).first() return persona_data except Exception as e: logger.error(f"Error getting persona data record for user {user_id}: {e}") return None def _collect_onboarding_data(self, user_id: str) -> Optional[Dict[str, Any]]: """ Collect all onboarding data needed for research persona generation. Returns: Dictionary with website_analysis, persona_data, research_preferences, business_info """ try: # Get website analysis website_analysis = self.onboarding_service.get_website_analysis(user_id, self.db) or {} # Get persona data persona_data_dict = self.onboarding_service.get_persona_data(user_id, self.db) or {} # Get research preferences research_prefs = self.onboarding_service.get_research_preferences(user_id, self.db) or {} # Get business info - construct from persona data and website analysis business_info = {} # Try to extract from persona data if persona_data_dict: core_persona = persona_data_dict.get('corePersona') or persona_data_dict.get('core_persona') if core_persona: if core_persona.get('industry'): business_info['industry'] = core_persona['industry'] if core_persona.get('target_audience'): business_info['target_audience'] = core_persona['target_audience'] # Fallback to website analysis if not in persona if not business_info.get('industry') and website_analysis: target_audience_data = website_analysis.get('target_audience', {}) if isinstance(target_audience_data, dict): industry_focus = target_audience_data.get('industry_focus') if industry_focus: business_info['industry'] = industry_focus demographics = target_audience_data.get('demographics') if demographics: business_info['target_audience'] = demographics if isinstance(demographics, str) else str(demographics) # Check if we have enough data if not website_analysis and not persona_data_dict: logger.warning(f"Insufficient onboarding data for user {user_id}") return None return { "website_analysis": website_analysis, "persona_data": persona_data_dict, "research_preferences": research_prefs, "business_info": business_info } except Exception as e: logger.error(f"Error collecting onboarding data for user {user_id}: {e}") return None