399 lines
18 KiB
Python
399 lines
18 KiB
Python
"""
|
|
Research Configuration API
|
|
Provides provider availability and persona-aware defaults for research.
|
|
"""
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
from typing import Dict, Any, Optional
|
|
from loguru import logger
|
|
from pydantic import BaseModel
|
|
|
|
from middleware.auth_middleware import get_current_user
|
|
from services.user_api_key_context import get_exa_key, get_gemini_key
|
|
from services.onboarding.database_service import OnboardingDatabaseService
|
|
from services.onboarding.progress_service import get_onboarding_progress_service
|
|
from services.database import get_db
|
|
from sqlalchemy.orm import Session
|
|
from services.research.research_persona_service import ResearchPersonaService
|
|
from services.research.research_persona_scheduler import schedule_research_persona_generation
|
|
from models.research_persona_models import ResearchPersona
|
|
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
class ProviderAvailability(BaseModel):
|
|
"""Provider availability status."""
|
|
google_available: bool
|
|
exa_available: bool
|
|
gemini_key_status: str # 'configured' | 'missing'
|
|
exa_key_status: str # 'configured' | 'missing'
|
|
|
|
|
|
class PersonaDefaults(BaseModel):
|
|
"""Persona-aware research defaults."""
|
|
industry: Optional[str] = None
|
|
target_audience: Optional[str] = None
|
|
suggested_domains: list[str] = []
|
|
suggested_exa_category: Optional[str] = None
|
|
|
|
|
|
class ResearchConfigResponse(BaseModel):
|
|
"""Combined research configuration response."""
|
|
provider_availability: ProviderAvailability
|
|
persona_defaults: PersonaDefaults
|
|
research_persona: Optional[ResearchPersona] = None
|
|
onboarding_completed: bool = False
|
|
persona_scheduled: bool = False
|
|
|
|
|
|
@router.get("/provider-availability", response_model=ProviderAvailability)
|
|
async def get_provider_availability(
|
|
current_user: Dict = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Check which research providers are available for the current user.
|
|
|
|
Returns:
|
|
- google_available: True if Gemini key is configured
|
|
- exa_available: True if Exa key is configured
|
|
- Key status for each provider
|
|
"""
|
|
try:
|
|
user_id = str(current_user.get('id'))
|
|
|
|
# Check API key availability
|
|
gemini_key = get_gemini_key(user_id)
|
|
exa_key = get_exa_key(user_id)
|
|
|
|
google_available = bool(gemini_key and gemini_key.strip())
|
|
exa_available = bool(exa_key and exa_key.strip())
|
|
|
|
return ProviderAvailability(
|
|
google_available=google_available,
|
|
exa_available=exa_available,
|
|
gemini_key_status='configured' if google_available else 'missing',
|
|
exa_key_status='configured' if exa_available else 'missing'
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"[ResearchConfig] Error checking provider availability for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
|
|
raise HTTPException(status_code=500, detail=f"Failed to check provider availability: {str(e)}")
|
|
|
|
|
|
@router.get("/persona-defaults", response_model=PersonaDefaults)
|
|
async def get_persona_defaults(
|
|
current_user: Dict = Depends(get_current_user),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Get persona-aware research defaults for the current user.
|
|
|
|
Returns industry, target audience, and smart suggestions based on onboarding data.
|
|
"""
|
|
try:
|
|
user_id = str(current_user.get('id'))
|
|
|
|
# Add explicit null check for database session
|
|
if not db:
|
|
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_persona_defaults")
|
|
# Return defaults rather than error
|
|
return PersonaDefaults()
|
|
|
|
db_service = OnboardingDatabaseService(db=db)
|
|
|
|
# Try to get persona data first (most reliable source for industry/target_audience)
|
|
persona_data = db_service.get_persona_data(user_id, db)
|
|
industry = 'General'
|
|
target_audience = 'General'
|
|
|
|
if persona_data:
|
|
core_persona = persona_data.get('corePersona') or persona_data.get('core_persona')
|
|
if core_persona:
|
|
if core_persona.get('industry'):
|
|
industry = core_persona['industry']
|
|
if core_persona.get('target_audience'):
|
|
target_audience = core_persona['target_audience']
|
|
|
|
# Fallback to website analysis if persona data doesn't have industry info
|
|
if industry == 'General':
|
|
website_analysis = db_service.get_website_analysis(user_id, db)
|
|
if website_analysis:
|
|
target_audience_data = website_analysis.get('target_audience', {})
|
|
if isinstance(target_audience_data, dict):
|
|
# Extract from target_audience JSON field
|
|
industry_focus = target_audience_data.get('industry_focus')
|
|
if industry_focus:
|
|
industry = industry_focus
|
|
demographics = target_audience_data.get('demographics')
|
|
if demographics:
|
|
target_audience = demographics if isinstance(demographics, str) else str(demographics)
|
|
|
|
# Suggest domains based on industry
|
|
suggested_domains = _get_domain_suggestions(industry)
|
|
|
|
# Suggest Exa category based on industry
|
|
suggested_exa_category = _get_exa_category_suggestion(industry)
|
|
|
|
return PersonaDefaults(
|
|
industry=industry,
|
|
target_audience=target_audience,
|
|
suggested_domains=suggested_domains,
|
|
suggested_exa_category=suggested_exa_category
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"[ResearchConfig] Error getting persona defaults for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
|
|
# Return defaults rather than error
|
|
return PersonaDefaults()
|
|
|
|
|
|
@router.get("/research-persona")
|
|
async def get_research_persona(
|
|
current_user: Dict = Depends(get_current_user),
|
|
db: Session = Depends(get_db),
|
|
force_refresh: bool = Query(False, description="Force regenerate persona even if cache is valid")
|
|
):
|
|
"""
|
|
Get or generate research persona for the current user.
|
|
|
|
Query params:
|
|
- force_refresh: If true, regenerate persona even if cache is valid (default: false)
|
|
|
|
Returns research persona with personalized defaults, suggestions, and configurations.
|
|
"""
|
|
try:
|
|
user_id = str(current_user.get('id'))
|
|
if not user_id:
|
|
raise HTTPException(status_code=401, detail="User not authenticated")
|
|
|
|
# Add explicit null check for database session
|
|
if not db:
|
|
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_research_persona")
|
|
raise HTTPException(status_code=500, detail="Database not available")
|
|
|
|
persona_service = ResearchPersonaService(db_session=db)
|
|
research_persona = persona_service.get_or_generate(user_id, force_refresh=force_refresh)
|
|
|
|
if not research_persona:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail="Research persona not available. Complete onboarding to generate one."
|
|
)
|
|
|
|
return research_persona.dict()
|
|
|
|
except HTTPException:
|
|
# Re-raise HTTPExceptions (e.g., 429 subscription limit) to preserve status code and details
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"[ResearchConfig] Error getting research persona for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
|
|
raise HTTPException(status_code=500, detail=f"Failed to get research persona: {str(e)}")
|
|
|
|
|
|
@router.get("/config", response_model=ResearchConfigResponse)
|
|
async def get_research_config(
|
|
current_user: Dict = Depends(get_current_user),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Get complete research configuration including provider availability and persona defaults.
|
|
"""
|
|
user_id = None
|
|
try:
|
|
user_id = str(current_user.get('id'))
|
|
logger.info(f"[ResearchConfig] Starting get_research_config for user {user_id}")
|
|
|
|
# Add explicit null check for database session
|
|
if not db:
|
|
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_research_config")
|
|
raise HTTPException(status_code=500, detail="Database session not available")
|
|
|
|
# Get provider availability
|
|
logger.debug(f"[ResearchConfig] Getting provider availability for user {user_id}")
|
|
gemini_key = get_gemini_key(user_id)
|
|
exa_key = get_exa_key(user_id)
|
|
|
|
google_available = bool(gemini_key and gemini_key.strip())
|
|
exa_available = bool(exa_key and exa_key.strip())
|
|
|
|
provider_availability = ProviderAvailability(
|
|
google_available=google_available,
|
|
exa_available=exa_available,
|
|
gemini_key_status='configured' if google_available else 'missing',
|
|
exa_key_status='configured' if exa_available else 'missing'
|
|
)
|
|
|
|
# Get persona defaults
|
|
logger.debug(f"[ResearchConfig] Getting persona defaults for user {user_id}")
|
|
db_service = OnboardingDatabaseService(db=db)
|
|
|
|
# Try to get persona data first (most reliable source for industry/target_audience)
|
|
try:
|
|
persona_data = db_service.get_persona_data(user_id, db)
|
|
except Exception as e:
|
|
logger.error(f"[ResearchConfig] Error getting persona data for user {user_id}: {e}", exc_info=True)
|
|
persona_data = None
|
|
|
|
industry = 'General'
|
|
target_audience = 'General'
|
|
|
|
if persona_data:
|
|
core_persona = persona_data.get('corePersona') or persona_data.get('core_persona')
|
|
if core_persona:
|
|
if core_persona.get('industry'):
|
|
industry = core_persona['industry']
|
|
if core_persona.get('target_audience'):
|
|
target_audience = core_persona['target_audience']
|
|
|
|
# Fallback to website analysis if persona data doesn't have industry info
|
|
if industry == 'General':
|
|
website_analysis = db_service.get_website_analysis(user_id, db)
|
|
if website_analysis:
|
|
target_audience_data = website_analysis.get('target_audience', {})
|
|
if isinstance(target_audience_data, dict):
|
|
# Extract from target_audience JSON field
|
|
industry_focus = target_audience_data.get('industry_focus')
|
|
if industry_focus:
|
|
industry = industry_focus
|
|
demographics = target_audience_data.get('demographics')
|
|
if demographics:
|
|
target_audience = demographics if isinstance(demographics, str) else str(demographics)
|
|
|
|
persona_defaults = PersonaDefaults(
|
|
industry=industry,
|
|
target_audience=target_audience,
|
|
suggested_domains=_get_domain_suggestions(industry),
|
|
suggested_exa_category=_get_exa_category_suggestion(industry)
|
|
)
|
|
|
|
# Check onboarding completion status
|
|
onboarding_completed = False
|
|
try:
|
|
logger.debug(f"[ResearchConfig] Checking onboarding status for user {user_id}")
|
|
progress_service = get_onboarding_progress_service()
|
|
onboarding_status = progress_service.get_onboarding_status(user_id)
|
|
onboarding_completed = onboarding_status.get('is_completed', False)
|
|
logger.info(
|
|
f"[ResearchConfig] Onboarding status check for user {user_id}: "
|
|
f"is_completed={onboarding_completed}, "
|
|
f"current_step={onboarding_status.get('current_step')}, "
|
|
f"progress={onboarding_status.get('completion_percentage')}"
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"[ResearchConfig] Could not check onboarding status for user {user_id}: {e}", exc_info=True)
|
|
# Continue with onboarding_completed=False
|
|
|
|
# Get research persona (optional, may not exist for all users)
|
|
# CRITICAL: Use get_cached_only() to avoid triggering rate limit checks
|
|
# Only return persona if it's already cached - don't generate on config load
|
|
research_persona = None
|
|
persona_scheduled = False
|
|
try:
|
|
logger.debug(f"[ResearchConfig] Getting cached research persona for user {user_id}")
|
|
persona_service = ResearchPersonaService(db_session=db)
|
|
research_persona = persona_service.get_cached_only(user_id)
|
|
|
|
logger.info(
|
|
f"[ResearchConfig] Research persona check for user {user_id}: "
|
|
f"persona_exists={research_persona is not None}, "
|
|
f"onboarding_completed={onboarding_completed}"
|
|
)
|
|
|
|
# If onboarding is completed but persona doesn't exist, schedule generation
|
|
if onboarding_completed and not research_persona:
|
|
try:
|
|
# Check if persona data exists (to ensure we have data to generate from)
|
|
db_service = OnboardingDatabaseService(db=db)
|
|
persona_data = db_service.get_persona_data(user_id, db)
|
|
if persona_data and (persona_data.get('corePersona') or persona_data.get('platformPersonas') or
|
|
persona_data.get('core_persona') or persona_data.get('platform_personas')):
|
|
# Schedule persona generation (20 minutes from now)
|
|
schedule_research_persona_generation(user_id, delay_minutes=20)
|
|
logger.info(f"Scheduled research persona generation for user {user_id} (onboarding already completed)")
|
|
persona_scheduled = True
|
|
else:
|
|
logger.info(f"Onboarding completed but no persona data found for user {user_id} - cannot schedule persona generation")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to schedule research persona generation: {e}", exc_info=True)
|
|
except Exception as e:
|
|
# get_cached_only() never raises HTTPException, but catch any unexpected errors
|
|
logger.warning(f"[ResearchConfig] Could not load cached research persona for user {user_id}: {e}", exc_info=True)
|
|
|
|
# FastAPI will automatically serialize the ResearchPersona Pydantic model
|
|
# If there's a serialization issue, we catch it and log it
|
|
try:
|
|
response = ResearchConfigResponse(
|
|
provider_availability=provider_availability,
|
|
persona_defaults=persona_defaults,
|
|
research_persona=research_persona,
|
|
onboarding_completed=onboarding_completed,
|
|
persona_scheduled=persona_scheduled
|
|
)
|
|
except Exception as serialization_error:
|
|
logger.error(f"[ResearchConfig] Failed to create ResearchConfigResponse for user {user_id}: {serialization_error}", exc_info=True)
|
|
# Try without research_persona as fallback
|
|
response = ResearchConfigResponse(
|
|
provider_availability=provider_availability,
|
|
persona_defaults=persona_defaults,
|
|
research_persona=None,
|
|
onboarding_completed=onboarding_completed,
|
|
persona_scheduled=persona_scheduled
|
|
)
|
|
|
|
logger.info(
|
|
f"[ResearchConfig] Response for user {user_id}: "
|
|
f"onboarding_completed={onboarding_completed}, "
|
|
f"persona_exists={research_persona is not None}, "
|
|
f"persona_scheduled={persona_scheduled}"
|
|
)
|
|
|
|
return response
|
|
except HTTPException:
|
|
# Re-raise HTTPExceptions (e.g., 429, 401, etc.) to preserve status codes
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"[ResearchConfig] CRITICAL ERROR getting research config for user {user_id if user_id else 'unknown'}: {e}", exc_info=True)
|
|
import traceback
|
|
logger.error(f"[ResearchConfig] Full traceback:\n{traceback.format_exc()}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to get research config: {str(e)}"
|
|
)
|
|
|
|
|
|
# Helper functions from RESEARCH_AI_HYPERPERSONALIZATION.md
|
|
|
|
def _get_domain_suggestions(industry: str) -> list[str]:
|
|
"""Get domain suggestions based on industry."""
|
|
domain_map = {
|
|
'Healthcare': ['pubmed.gov', 'nejm.org', 'thelancet.com', 'nih.gov'],
|
|
'Technology': ['techcrunch.com', 'wired.com', 'arstechnica.com', 'theverge.com'],
|
|
'Finance': ['wsj.com', 'bloomberg.com', 'ft.com', 'reuters.com'],
|
|
'Science': ['nature.com', 'sciencemag.org', 'cell.com', 'pnas.org'],
|
|
'Business': ['hbr.org', 'forbes.com', 'businessinsider.com', 'mckinsey.com'],
|
|
'Marketing': ['marketingland.com', 'adweek.com', 'hubspot.com', 'moz.com'],
|
|
'Education': ['edutopia.org', 'chronicle.com', 'insidehighered.com'],
|
|
'Real Estate': ['realtor.com', 'zillow.com', 'forbes.com'],
|
|
'Entertainment': ['variety.com', 'hollywoodreporter.com', 'deadline.com'],
|
|
'Travel': ['lonelyplanet.com', 'nationalgeographic.com', 'travelandleisure.com'],
|
|
'Fashion': ['vogue.com', 'elle.com', 'wwd.com'],
|
|
'Sports': ['espn.com', 'si.com', 'bleacherreport.com'],
|
|
'Law': ['law.com', 'abajournal.com', 'scotusblog.com'],
|
|
}
|
|
return domain_map.get(industry, [])
|
|
|
|
|
|
def _get_exa_category_suggestion(industry: str) -> Optional[str]:
|
|
"""Get Exa category suggestion based on industry."""
|
|
category_map = {
|
|
'Healthcare': 'research paper',
|
|
'Science': 'research paper',
|
|
'Finance': 'financial report',
|
|
'Technology': 'company',
|
|
'Business': 'company',
|
|
'Marketing': 'company',
|
|
'Education': 'research paper',
|
|
'Law': 'pdf',
|
|
}
|
|
return category_map.get(industry)
|
|
|