Scheduled research persona generation
This commit is contained in:
171
backend/services/research/research_persona_prompt_builder.py
Normal file
171
backend/services/research/research_persona_prompt_builder.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""
|
||||
Research Persona Prompt Builder
|
||||
|
||||
Handles building comprehensive prompts for research persona generation.
|
||||
Generates personalized research defaults, suggestions, and configurations.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any
|
||||
import json
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class ResearchPersonaPromptBuilder:
|
||||
"""Builds comprehensive prompts for research persona generation."""
|
||||
|
||||
def build_research_persona_prompt(self, onboarding_data: Dict[str, Any]) -> str:
|
||||
"""Build the research persona generation prompt with comprehensive data."""
|
||||
|
||||
# Extract data from onboarding_data
|
||||
website_analysis = onboarding_data.get("website_analysis", {}) or {}
|
||||
persona_data = onboarding_data.get("persona_data", {}) or {}
|
||||
research_prefs = onboarding_data.get("research_preferences", {}) or {}
|
||||
business_info = onboarding_data.get("business_info", {}) or {}
|
||||
|
||||
# Extract core persona
|
||||
core_persona = persona_data.get("core_persona", {}) or {}
|
||||
|
||||
prompt = f"""
|
||||
COMPREHENSIVE RESEARCH PERSONA GENERATION TASK: Create a highly detailed, personalized research persona based on the user's business, writing style, and content strategy. This persona will provide intelligent defaults and suggestions for research inputs.
|
||||
|
||||
=== USER CONTEXT ===
|
||||
|
||||
BUSINESS INFORMATION:
|
||||
{json.dumps(business_info, indent=2)}
|
||||
|
||||
WEBSITE ANALYSIS:
|
||||
{json.dumps(website_analysis, indent=2)}
|
||||
|
||||
CORE PERSONA:
|
||||
{json.dumps(core_persona, indent=2)}
|
||||
|
||||
RESEARCH PREFERENCES:
|
||||
{json.dumps(research_prefs, indent=2)}
|
||||
|
||||
=== RESEARCH PERSONA GENERATION REQUIREMENTS ===
|
||||
|
||||
Generate a comprehensive research persona in JSON format with the following structure:
|
||||
|
||||
1. DEFAULT VALUES:
|
||||
- "default_industry": Extract from core_persona.industry, business_info.industry, or website_analysis target_audience. Use "General" only if none available.
|
||||
- "default_target_audience": Extract from core_persona.target_audience, website_analysis.target_audience, or business_info.target_audience. Be specific and descriptive.
|
||||
- "default_research_mode": Suggest "basic", "comprehensive", or "targeted" based on research_preferences.research_depth and content_type preferences.
|
||||
- "default_provider": Suggest "google" for news/trends, "exa" for academic/technical deep-dives, or "google" as default.
|
||||
|
||||
2. KEYWORD INTELLIGENCE:
|
||||
- "suggested_keywords": Generate 8-12 keywords relevant to the user's industry, interests (from core_persona), and content goals.
|
||||
- "keyword_expansion_patterns": Create a dictionary mapping common keywords to expanded, industry-specific terms. Include 10-15 patterns like:
|
||||
{{"AI": ["healthcare AI", "medical AI", "clinical AI", "diagnostic AI"], "tools": ["medical devices", "clinical tools"], ...}}
|
||||
Focus on industry-specific terminology from the user's domain.
|
||||
|
||||
3. DOMAIN EXPERTISE:
|
||||
- "suggested_exa_domains": List 4-6 authoritative domains for the user's industry (e.g., Healthcare: ["pubmed.gov", "nejm.org", "thelancet.com"]).
|
||||
- "suggested_exa_category": Suggest appropriate Exa category based on industry:
|
||||
- Healthcare/Science: "research paper"
|
||||
- Finance: "financial report"
|
||||
- Technology/Business: "company" or "news"
|
||||
- Default: null (empty string for all categories)
|
||||
|
||||
4. RESEARCH ANGLES:
|
||||
- "research_angles": Generate 5-8 alternative research angles/focuses based on:
|
||||
- User's pain points and challenges (from core_persona)
|
||||
- Industry trends and opportunities
|
||||
- Content goals (from research_preferences)
|
||||
- Audience interests (from core_persona.interests)
|
||||
Examples: "Compare {{topic}} tools", "{{topic}} ROI analysis", "Latest {{topic}} trends", etc.
|
||||
|
||||
5. QUERY ENHANCEMENT:
|
||||
- "query_enhancement_rules": Create templates for improving vague user queries:
|
||||
{{"vague_ai": "Research: AI applications in {{industry}} for {{audience}}", "vague_tools": "Compare top {{industry}} tools", ...}}
|
||||
Include 5-8 enhancement patterns.
|
||||
|
||||
6. RECOMMENDED PRESETS:
|
||||
- "recommended_presets": Generate 3-5 personalized research preset templates. Each preset should include:
|
||||
- name: Descriptive name (e.g., "{{Industry}} Trends", "{{Audience}} Insights")
|
||||
- keywords: Research query string
|
||||
- industry: User's industry
|
||||
- target_audience: User's target audience
|
||||
- research_mode: "basic", "comprehensive", or "targeted"
|
||||
- config: Complete ResearchConfig object with appropriate settings
|
||||
- description: Brief explanation of what this preset researches
|
||||
Make presets relevant to the user's specific industry, audience, and content goals.
|
||||
|
||||
7. RESEARCH PREFERENCES:
|
||||
- "research_preferences": Extract and structure research preferences from onboarding:
|
||||
- research_depth: From research_preferences.research_depth
|
||||
- content_types: From research_preferences.content_types
|
||||
- auto_research: From research_preferences.auto_research
|
||||
- factual_content: From research_preferences.factual_content
|
||||
|
||||
=== OUTPUT REQUIREMENTS ===
|
||||
|
||||
Return a valid JSON object matching this exact structure:
|
||||
{{
|
||||
"default_industry": "string",
|
||||
"default_target_audience": "string",
|
||||
"default_research_mode": "basic" | "comprehensive" | "targeted",
|
||||
"default_provider": "google" | "exa",
|
||||
"suggested_keywords": ["keyword1", "keyword2", ...],
|
||||
"keyword_expansion_patterns": {{
|
||||
"keyword": ["expansion1", "expansion2", ...]
|
||||
}},
|
||||
"suggested_exa_domains": ["domain1.com", "domain2.com", ...],
|
||||
"suggested_exa_category": "string or null",
|
||||
"research_angles": ["angle1", "angle2", ...],
|
||||
"query_enhancement_rules": {{
|
||||
"pattern": "template"
|
||||
}},
|
||||
"recommended_presets": [
|
||||
{{
|
||||
"name": "string",
|
||||
"keywords": "string",
|
||||
"industry": "string",
|
||||
"target_audience": "string",
|
||||
"research_mode": "basic" | "comprehensive" | "targeted",
|
||||
"config": {{
|
||||
"mode": "basic" | "comprehensive" | "targeted",
|
||||
"provider": "google" | "exa",
|
||||
"max_sources": 10 | 15 | 12,
|
||||
"include_statistics": true | false,
|
||||
"include_expert_quotes": true | false,
|
||||
"include_competitors": true | false,
|
||||
"include_trends": true | false,
|
||||
"exa_category": "string or null",
|
||||
"exa_include_domains": ["domain1.com", ...],
|
||||
"exa_search_type": "auto" | "keyword" | "neural"
|
||||
}},
|
||||
"description": "string"
|
||||
}}
|
||||
],
|
||||
"research_preferences": {{
|
||||
"research_depth": "string",
|
||||
"content_types": ["type1", "type2", ...],
|
||||
"auto_research": true | false,
|
||||
"factual_content": true | false
|
||||
}},
|
||||
"version": "1.0",
|
||||
"confidence_score": 85.0
|
||||
}}
|
||||
|
||||
=== IMPORTANT INSTRUCTIONS ===
|
||||
|
||||
1. Be highly specific and personalized - use actual data from the user's business, persona, and preferences.
|
||||
2. Avoid generic suggestions - every field should reflect the user's unique context.
|
||||
3. For industries not clearly identified, infer from website_analysis.content_characteristics or writing_style.
|
||||
4. Ensure all suggested keywords, domains, and angles are relevant to the user's industry and audience.
|
||||
5. Generate realistic, actionable presets that the user would actually want to use.
|
||||
6. Confidence score should reflect data richness (0-100): higher if rich onboarding data, lower if minimal data.
|
||||
7. Return ONLY valid JSON - no markdown formatting, no explanatory text.
|
||||
|
||||
Generate the research persona now:
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
def get_json_schema(self) -> Dict[str, Any]:
|
||||
"""Return JSON schema for structured LLM response."""
|
||||
# This will be used with llm_text_gen(json_struct=...)
|
||||
from models.research_persona_models import ResearchPersona, ResearchPreset
|
||||
|
||||
# Convert Pydantic model to JSON schema
|
||||
return ResearchPersona.schema()
|
||||
194
backend/services/research/research_persona_scheduler.py
Normal file
194
backend/services/research/research_persona_scheduler.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""
|
||||
Research Persona Scheduler
|
||||
Handles scheduled generation of research personas after onboarding.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Dict, Any
|
||||
from loguru import logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.research.research_persona_service import ResearchPersonaService
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
|
||||
|
||||
async def generate_research_persona_task(user_id: str):
|
||||
"""
|
||||
Async task function to generate research persona for a user.
|
||||
|
||||
This function is called by the scheduler 20 minutes after onboarding completion.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
"""
|
||||
db = None
|
||||
try:
|
||||
logger.info(f"Scheduled research persona generation started for user {user_id}")
|
||||
|
||||
# Get database session
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.error(f"Failed to get database session for research persona generation (user: {user_id})")
|
||||
return
|
||||
|
||||
# Generate research persona
|
||||
persona_service = ResearchPersonaService(db_session=db)
|
||||
|
||||
# Check if persona already exists to avoid unnecessary API calls
|
||||
persona_data = persona_service._get_persona_data_record(user_id)
|
||||
if persona_data and persona_data.research_persona:
|
||||
logger.info(f"Research persona already exists for user {user_id}, skipping generation")
|
||||
return
|
||||
|
||||
start_time = datetime.utcnow()
|
||||
try:
|
||||
research_persona = persona_service.get_or_generate(user_id, force_refresh=False)
|
||||
execution_time = (datetime.utcnow() - start_time).total_seconds()
|
||||
|
||||
if research_persona:
|
||||
logger.info(f"✅ Scheduled research persona generation completed for user {user_id}")
|
||||
|
||||
# Log success to scheduler event log for dashboard
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_completed',
|
||||
event_date=start_time,
|
||||
job_id=f"research_persona_{user_id}",
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
event_data={
|
||||
'job_function': 'generate_research_persona_task',
|
||||
'execution_time_seconds': execution_time,
|
||||
'status': 'success'
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
except Exception as log_error:
|
||||
logger.warning(f"Failed to log persona generation success to scheduler event log: {log_error}")
|
||||
if db:
|
||||
db.rollback()
|
||||
else:
|
||||
error_msg = (
|
||||
f"Scheduled research persona generation FAILED for user {user_id}. "
|
||||
f"Expensive API call was made but generation failed. "
|
||||
f"Will NOT automatically retry to prevent wasteful API calls."
|
||||
)
|
||||
logger.error(f"❌ {error_msg}")
|
||||
|
||||
# Log failure to scheduler event log for dashboard visibility
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_failed',
|
||||
event_date=start_time,
|
||||
job_id=f"research_persona_{user_id}",
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
error_message=error_msg,
|
||||
event_data={
|
||||
'job_function': 'generate_research_persona_task',
|
||||
'execution_time_seconds': execution_time,
|
||||
'status': 'failed',
|
||||
'failure_reason': 'generation_returned_none',
|
||||
'expensive_api_call': True
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
except Exception as log_error:
|
||||
logger.warning(f"Failed to log persona generation failure to scheduler event log: {log_error}")
|
||||
if db:
|
||||
db.rollback()
|
||||
|
||||
# DO NOT reschedule - this prevents infinite retry loops
|
||||
# User can manually trigger generation from frontend if needed
|
||||
except Exception as gen_error:
|
||||
execution_time = (datetime.utcnow() - start_time).total_seconds()
|
||||
error_msg = (
|
||||
f"Exception during scheduled research persona generation for user {user_id}: {str(gen_error)}. "
|
||||
f"Expensive API call may have been made. Will NOT automatically retry."
|
||||
)
|
||||
logger.error(f"❌ {error_msg}")
|
||||
|
||||
# Log exception to scheduler event log for dashboard visibility
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_failed',
|
||||
event_date=start_time,
|
||||
job_id=f"research_persona_{user_id}", # Match scheduled job ID format
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
error_message=error_msg,
|
||||
event_data={
|
||||
'job_function': 'generate_research_persona_task',
|
||||
'execution_time_seconds': execution_time,
|
||||
'status': 'failed',
|
||||
'failure_reason': 'exception',
|
||||
'exception_type': type(gen_error).__name__,
|
||||
'exception_message': str(gen_error),
|
||||
'expensive_api_call': True
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
except Exception as log_error:
|
||||
logger.warning(f"Failed to log persona generation exception to scheduler event log: {log_error}")
|
||||
if db:
|
||||
db.rollback()
|
||||
|
||||
# DO NOT reschedule - prevent infinite retry loops
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in scheduled research persona generation for user {user_id}: {e}")
|
||||
finally:
|
||||
if db:
|
||||
try:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Error closing database session: {e}")
|
||||
|
||||
|
||||
def schedule_research_persona_generation(user_id: str, delay_minutes: int = 20) -> str:
|
||||
"""
|
||||
Schedule research persona generation for a user after a delay.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
delay_minutes: Delay in minutes before generating persona (default: 20)
|
||||
|
||||
Returns:
|
||||
Job ID
|
||||
"""
|
||||
try:
|
||||
from services.scheduler import get_scheduler
|
||||
|
||||
scheduler = get_scheduler()
|
||||
|
||||
# Calculate run date (current time + delay) - ensure UTC timezone-aware
|
||||
run_date = datetime.now(timezone.utc) + timedelta(minutes=delay_minutes)
|
||||
|
||||
# Generate consistent job ID (without timestamp) for proper restoration
|
||||
# This allows restoration to find and restore the job with original scheduled time
|
||||
# Note: Clerk user_id already includes "user_" prefix, so we don't add it again
|
||||
job_id = f"research_persona_{user_id}"
|
||||
|
||||
# Schedule the task
|
||||
scheduled_job_id = scheduler.schedule_one_time_task(
|
||||
func=generate_research_persona_task,
|
||||
run_date=run_date,
|
||||
job_id=job_id,
|
||||
kwargs={"user_id": user_id},
|
||||
replace_existing=True
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Scheduled research persona generation for user {user_id} "
|
||||
f"at {run_date} (job_id: {scheduled_job_id})"
|
||||
)
|
||||
|
||||
return scheduled_job_id
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to schedule research persona generation for user {user_id}: {e}")
|
||||
raise
|
||||
|
||||
384
backend/services/research/research_persona_service.py
Normal file
384
backend/services/research/research_persona_service.py
Normal file
@@ -0,0 +1,384 @@
|
||||
"""
|
||||
Research Persona Service
|
||||
|
||||
Handles generation, caching, and retrieval of AI-powered research personas.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
from loguru import logger
|
||||
from fastapi import HTTPException
|
||||
|
||||
from services.database import get_db_session
|
||||
from models.onboarding import PersonaData, OnboardingSession
|
||||
from models.research_persona_models import ResearchPersona
|
||||
from .research_persona_prompt_builder import ResearchPersonaPromptBuilder
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from services.onboarding.database_service import OnboardingDatabaseService
|
||||
from services.persona_data_service import PersonaDataService
|
||||
|
||||
|
||||
class ResearchPersonaService:
|
||||
"""Service for generating and managing research personas."""
|
||||
|
||||
CACHE_TTL_DAYS = 7 # 7-day cache TTL
|
||||
|
||||
def __init__(self, db_session=None):
|
||||
self.db = db_session or get_db_session()
|
||||
self.prompt_builder = ResearchPersonaPromptBuilder()
|
||||
self.onboarding_service = OnboardingDatabaseService(db=self.db)
|
||||
self.persona_data_service = PersonaDataService(db_session=self.db)
|
||||
|
||||
def get_cached_only(
|
||||
self,
|
||||
user_id: str
|
||||
) -> Optional[ResearchPersona]:
|
||||
"""
|
||||
Get research persona for user ONLY if it exists in cache.
|
||||
This method NEVER generates - it only returns cached personas.
|
||||
Use this for config endpoints to avoid triggering rate limit checks.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
|
||||
Returns:
|
||||
ResearchPersona if cached and valid, None otherwise
|
||||
"""
|
||||
try:
|
||||
# Get persona data record
|
||||
persona_data = self._get_persona_data_record(user_id)
|
||||
|
||||
if not persona_data:
|
||||
logger.debug(f"No persona data found for user {user_id}")
|
||||
return None
|
||||
|
||||
# Only return if cache is valid and persona exists
|
||||
if self.is_cache_valid(persona_data) and persona_data.research_persona:
|
||||
try:
|
||||
logger.debug(f"Returning cached research persona for user {user_id}")
|
||||
return ResearchPersona(**persona_data.research_persona)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse cached research persona: {e}")
|
||||
return None
|
||||
|
||||
# Cache invalid or persona missing - return None (don't generate)
|
||||
logger.debug(f"No valid cached research persona for user {user_id}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting cached research persona for user {user_id}: {e}")
|
||||
return None
|
||||
|
||||
def get_or_generate(
|
||||
self,
|
||||
user_id: str,
|
||||
force_refresh: bool = False
|
||||
) -> Optional[ResearchPersona]:
|
||||
"""
|
||||
Get research persona for user, generating if missing or expired.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
force_refresh: If True, regenerate even if cache is valid
|
||||
|
||||
Returns:
|
||||
ResearchPersona if successful, None otherwise
|
||||
"""
|
||||
try:
|
||||
# Get persona data record
|
||||
persona_data = self._get_persona_data_record(user_id)
|
||||
|
||||
if not persona_data:
|
||||
logger.warning(f"No persona data found for user {user_id}, cannot generate research persona")
|
||||
return None
|
||||
|
||||
# Check cache if not forcing refresh
|
||||
if not force_refresh and self.is_cache_valid(persona_data):
|
||||
if persona_data.research_persona:
|
||||
logger.info(f"Using cached research persona for user {user_id}")
|
||||
try:
|
||||
return ResearchPersona(**persona_data.research_persona)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse cached research persona: {e}, regenerating...")
|
||||
# Fall through to regeneration
|
||||
else:
|
||||
logger.info(f"Research persona missing for user {user_id}, generating...")
|
||||
else:
|
||||
if force_refresh:
|
||||
logger.info(f"Forcing refresh of research persona for user {user_id}")
|
||||
else:
|
||||
logger.info(f"Cache expired for user {user_id}, regenerating...")
|
||||
|
||||
# Generate new research persona
|
||||
try:
|
||||
research_persona = self.generate_research_persona(user_id)
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
|
||||
raise
|
||||
|
||||
if research_persona:
|
||||
# Save to database
|
||||
if self.save_research_persona(user_id, research_persona):
|
||||
logger.info(f"✅ Research persona generated and saved for user {user_id}")
|
||||
else:
|
||||
logger.warning(f"Failed to save research persona for user {user_id}")
|
||||
|
||||
return research_persona
|
||||
else:
|
||||
# Log detailed error for debugging expensive failures
|
||||
logger.error(
|
||||
f"❌ Failed to generate research persona for user {user_id} - "
|
||||
f"This is an expensive failure (API call consumed). Check logs above for details."
|
||||
)
|
||||
# Don't return None silently - let the caller know this failed
|
||||
return None
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting/generating research persona for user {user_id}: {e}")
|
||||
return None
|
||||
|
||||
def generate_research_persona(self, user_id: str) -> Optional[ResearchPersona]:
|
||||
"""
|
||||
Generate a new research persona for the user.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
|
||||
Returns:
|
||||
ResearchPersona if successful, None otherwise
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Generating research persona for user {user_id}")
|
||||
|
||||
# Collect onboarding data
|
||||
onboarding_data = self._collect_onboarding_data(user_id)
|
||||
|
||||
if not onboarding_data:
|
||||
logger.warning(f"Insufficient onboarding data for user {user_id}")
|
||||
return None
|
||||
|
||||
# Build prompt
|
||||
prompt = self.prompt_builder.build_research_persona_prompt(onboarding_data)
|
||||
|
||||
# Get JSON schema for structured response
|
||||
json_schema = self.prompt_builder.get_json_schema()
|
||||
|
||||
# Call LLM with structured JSON response
|
||||
logger.info(f"Calling LLM for research persona generation (user: {user_id})")
|
||||
try:
|
||||
response_text = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=json_schema,
|
||||
user_id=user_id
|
||||
)
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
|
||||
logger.warning(f"HTTPException during LLM call for user {user_id} - re-raising")
|
||||
raise
|
||||
except RuntimeError as e:
|
||||
# Re-raise RuntimeError (subscription limits) as HTTPException
|
||||
logger.warning(f"RuntimeError during LLM call for user {user_id}: {e}")
|
||||
raise HTTPException(status_code=429, detail=str(e))
|
||||
|
||||
if not response_text:
|
||||
logger.error("Empty response from LLM")
|
||||
return None
|
||||
|
||||
# Parse JSON response
|
||||
import json
|
||||
try:
|
||||
# When json_struct is provided, llm_text_gen may return a dict directly
|
||||
if isinstance(response_text, dict):
|
||||
# Already parsed, use directly
|
||||
persona_dict = response_text
|
||||
elif isinstance(response_text, str):
|
||||
# Handle case where LLM returns markdown-wrapped JSON or plain JSON string
|
||||
response_text = response_text.strip()
|
||||
if response_text.startswith("```json"):
|
||||
response_text = response_text[7:]
|
||||
if response_text.startswith("```"):
|
||||
response_text = response_text[3:]
|
||||
if response_text.endswith("```"):
|
||||
response_text = response_text[:-3]
|
||||
response_text = response_text.strip()
|
||||
|
||||
persona_dict = json.loads(response_text)
|
||||
else:
|
||||
logger.error(f"Unexpected response type from LLM: {type(response_text)}")
|
||||
return None
|
||||
|
||||
# Add generated_at timestamp
|
||||
persona_dict["generated_at"] = datetime.utcnow().isoformat()
|
||||
|
||||
# Validate and create ResearchPersona
|
||||
# Log the dict structure for debugging if validation fails
|
||||
try:
|
||||
research_persona = ResearchPersona(**persona_dict)
|
||||
logger.info(f"✅ Research persona generated successfully for user {user_id}")
|
||||
return research_persona
|
||||
except Exception as validation_error:
|
||||
logger.error(f"Failed to validate ResearchPersona from dict: {validation_error}")
|
||||
logger.debug(f"Persona dict keys: {list(persona_dict.keys()) if isinstance(persona_dict, dict) else 'Not a dict'}")
|
||||
logger.debug(f"Persona dict sample: {str(persona_dict)[:500]}")
|
||||
# Re-raise to be caught by outer exception handler
|
||||
raise
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse LLM response as JSON: {e}")
|
||||
logger.debug(f"Response text: {response_text[:500] if isinstance(response_text, str) else str(response_text)[:500]}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create ResearchPersona from response: {e}")
|
||||
return None
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating research persona for user {user_id}: {e}")
|
||||
return None
|
||||
|
||||
def is_cache_valid(self, persona_data: PersonaData) -> bool:
|
||||
"""
|
||||
Check if cached research persona is still valid (within TTL).
|
||||
|
||||
Args:
|
||||
persona_data: PersonaData database record
|
||||
|
||||
Returns:
|
||||
True if cache is valid, False otherwise
|
||||
"""
|
||||
if not persona_data.research_persona_generated_at:
|
||||
return False
|
||||
|
||||
# Check if within TTL
|
||||
cache_age = datetime.utcnow() - persona_data.research_persona_generated_at
|
||||
is_valid = cache_age < timedelta(days=self.CACHE_TTL_DAYS)
|
||||
|
||||
if not is_valid:
|
||||
logger.debug(f"Cache expired (age: {cache_age.days} days, TTL: {self.CACHE_TTL_DAYS} days)")
|
||||
|
||||
return is_valid
|
||||
|
||||
def save_research_persona(
|
||||
self,
|
||||
user_id: str,
|
||||
research_persona: ResearchPersona
|
||||
) -> bool:
|
||||
"""
|
||||
Save research persona to database.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
research_persona: ResearchPersona to save
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
persona_data = self._get_persona_data_record(user_id)
|
||||
|
||||
if not persona_data:
|
||||
logger.error(f"No persona data record found for user {user_id}")
|
||||
return False
|
||||
|
||||
# Convert ResearchPersona to dict for JSON storage
|
||||
persona_dict = research_persona.dict()
|
||||
|
||||
# Update database record
|
||||
persona_data.research_persona = persona_dict
|
||||
persona_data.research_persona_generated_at = datetime.utcnow()
|
||||
|
||||
self.db.commit()
|
||||
|
||||
logger.info(f"✅ Research persona saved for user {user_id}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving research persona for user {user_id}: {e}")
|
||||
self.db.rollback()
|
||||
return False
|
||||
|
||||
def _get_persona_data_record(self, user_id: str) -> Optional[PersonaData]:
|
||||
"""Get PersonaData database record for user."""
|
||||
try:
|
||||
# Ensure research_persona columns exist before querying
|
||||
self.onboarding_service._ensure_research_persona_columns(self.db)
|
||||
|
||||
# Get onboarding session
|
||||
session = self.db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not session:
|
||||
return None
|
||||
|
||||
# Get persona data
|
||||
persona_data = self.db.query(PersonaData).filter(
|
||||
PersonaData.session_id == session.id
|
||||
).first()
|
||||
|
||||
return persona_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting persona data record for user {user_id}: {e}")
|
||||
return None
|
||||
|
||||
def _collect_onboarding_data(self, user_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Collect all onboarding data needed for research persona generation.
|
||||
|
||||
Returns:
|
||||
Dictionary with website_analysis, persona_data, research_preferences, business_info
|
||||
"""
|
||||
try:
|
||||
# Get website analysis
|
||||
website_analysis = self.onboarding_service.get_website_analysis(user_id, self.db) or {}
|
||||
|
||||
# Get persona data
|
||||
persona_data_dict = self.onboarding_service.get_persona_data(user_id, self.db) or {}
|
||||
|
||||
# Get research preferences
|
||||
research_prefs = self.onboarding_service.get_research_preferences(user_id, self.db) or {}
|
||||
|
||||
# Get business info - construct from persona data and website analysis
|
||||
business_info = {}
|
||||
|
||||
# Try to extract from persona data
|
||||
if persona_data_dict:
|
||||
core_persona = persona_data_dict.get('corePersona') or persona_data_dict.get('core_persona')
|
||||
if core_persona:
|
||||
if core_persona.get('industry'):
|
||||
business_info['industry'] = core_persona['industry']
|
||||
if core_persona.get('target_audience'):
|
||||
business_info['target_audience'] = core_persona['target_audience']
|
||||
|
||||
# Fallback to website analysis if not in persona
|
||||
if not business_info.get('industry') and website_analysis:
|
||||
target_audience_data = website_analysis.get('target_audience', {})
|
||||
if isinstance(target_audience_data, dict):
|
||||
industry_focus = target_audience_data.get('industry_focus')
|
||||
if industry_focus:
|
||||
business_info['industry'] = industry_focus
|
||||
demographics = target_audience_data.get('demographics')
|
||||
if demographics:
|
||||
business_info['target_audience'] = demographics if isinstance(demographics, str) else str(demographics)
|
||||
|
||||
# Check if we have enough data
|
||||
if not website_analysis and not persona_data_dict:
|
||||
logger.warning(f"Insufficient onboarding data for user {user_id}")
|
||||
return None
|
||||
|
||||
return {
|
||||
"website_analysis": website_analysis,
|
||||
"persona_data": persona_data_dict,
|
||||
"research_preferences": research_prefs,
|
||||
"business_info": business_info
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error collecting onboarding data for user {user_id}: {e}")
|
||||
return None
|
||||
Reference in New Issue
Block a user