Files
ALwrity/backend/api/research_config.py
2025-11-05 08:51:00 +05:30

399 lines
18 KiB
Python

"""
Research Configuration API
Provides provider availability and persona-aware defaults for research.
"""
from fastapi import APIRouter, Depends, HTTPException, Query
from typing import Dict, Any, Optional
from loguru import logger
from pydantic import BaseModel
from middleware.auth_middleware import get_current_user
from services.user_api_key_context import get_exa_key, get_gemini_key
from services.onboarding.database_service import OnboardingDatabaseService
from services.onboarding.progress_service import get_onboarding_progress_service
from services.database import get_db
from sqlalchemy.orm import Session
from services.research.research_persona_service import ResearchPersonaService
from services.research.research_persona_scheduler import schedule_research_persona_generation
from models.research_persona_models import ResearchPersona
router = APIRouter()
class ProviderAvailability(BaseModel):
"""Provider availability status."""
google_available: bool
exa_available: bool
gemini_key_status: str # 'configured' | 'missing'
exa_key_status: str # 'configured' | 'missing'
class PersonaDefaults(BaseModel):
"""Persona-aware research defaults."""
industry: Optional[str] = None
target_audience: Optional[str] = None
suggested_domains: list[str] = []
suggested_exa_category: Optional[str] = None
class ResearchConfigResponse(BaseModel):
"""Combined research configuration response."""
provider_availability: ProviderAvailability
persona_defaults: PersonaDefaults
research_persona: Optional[ResearchPersona] = None
onboarding_completed: bool = False
persona_scheduled: bool = False
@router.get("/provider-availability", response_model=ProviderAvailability)
async def get_provider_availability(
current_user: Dict = Depends(get_current_user)
):
"""
Check which research providers are available for the current user.
Returns:
- google_available: True if Gemini key is configured
- exa_available: True if Exa key is configured
- Key status for each provider
"""
try:
user_id = str(current_user.get('id'))
# Check API key availability
gemini_key = get_gemini_key(user_id)
exa_key = get_exa_key(user_id)
google_available = bool(gemini_key and gemini_key.strip())
exa_available = bool(exa_key and exa_key.strip())
return ProviderAvailability(
google_available=google_available,
exa_available=exa_available,
gemini_key_status='configured' if google_available else 'missing',
exa_key_status='configured' if exa_available else 'missing'
)
except Exception as e:
logger.error(f"[ResearchConfig] Error checking provider availability for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to check provider availability: {str(e)}")
@router.get("/persona-defaults", response_model=PersonaDefaults)
async def get_persona_defaults(
current_user: Dict = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get persona-aware research defaults for the current user.
Returns industry, target audience, and smart suggestions based on onboarding data.
"""
try:
user_id = str(current_user.get('id'))
# Add explicit null check for database session
if not db:
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_persona_defaults")
# Return defaults rather than error
return PersonaDefaults()
db_service = OnboardingDatabaseService(db=db)
# Try to get persona data first (most reliable source for industry/target_audience)
persona_data = db_service.get_persona_data(user_id, db)
industry = 'General'
target_audience = 'General'
if persona_data:
core_persona = persona_data.get('corePersona') or persona_data.get('core_persona')
if core_persona:
if core_persona.get('industry'):
industry = core_persona['industry']
if core_persona.get('target_audience'):
target_audience = core_persona['target_audience']
# Fallback to website analysis if persona data doesn't have industry info
if industry == 'General':
website_analysis = db_service.get_website_analysis(user_id, db)
if website_analysis:
target_audience_data = website_analysis.get('target_audience', {})
if isinstance(target_audience_data, dict):
# Extract from target_audience JSON field
industry_focus = target_audience_data.get('industry_focus')
if industry_focus:
industry = industry_focus
demographics = target_audience_data.get('demographics')
if demographics:
target_audience = demographics if isinstance(demographics, str) else str(demographics)
# Suggest domains based on industry
suggested_domains = _get_domain_suggestions(industry)
# Suggest Exa category based on industry
suggested_exa_category = _get_exa_category_suggestion(industry)
return PersonaDefaults(
industry=industry,
target_audience=target_audience,
suggested_domains=suggested_domains,
suggested_exa_category=suggested_exa_category
)
except Exception as e:
logger.error(f"[ResearchConfig] Error getting persona defaults for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
# Return defaults rather than error
return PersonaDefaults()
@router.get("/research-persona")
async def get_research_persona(
current_user: Dict = Depends(get_current_user),
db: Session = Depends(get_db),
force_refresh: bool = Query(False, description="Force regenerate persona even if cache is valid")
):
"""
Get or generate research persona for the current user.
Query params:
- force_refresh: If true, regenerate persona even if cache is valid (default: false)
Returns research persona with personalized defaults, suggestions, and configurations.
"""
try:
user_id = str(current_user.get('id'))
if not user_id:
raise HTTPException(status_code=401, detail="User not authenticated")
# Add explicit null check for database session
if not db:
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_research_persona")
raise HTTPException(status_code=500, detail="Database not available")
persona_service = ResearchPersonaService(db_session=db)
research_persona = persona_service.get_or_generate(user_id, force_refresh=force_refresh)
if not research_persona:
raise HTTPException(
status_code=404,
detail="Research persona not available. Complete onboarding to generate one."
)
return research_persona.dict()
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) to preserve status code and details
raise
except Exception as e:
logger.error(f"[ResearchConfig] Error getting research persona for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to get research persona: {str(e)}")
@router.get("/config", response_model=ResearchConfigResponse)
async def get_research_config(
current_user: Dict = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get complete research configuration including provider availability and persona defaults.
"""
user_id = None
try:
user_id = str(current_user.get('id'))
logger.info(f"[ResearchConfig] Starting get_research_config for user {user_id}")
# Add explicit null check for database session
if not db:
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_research_config")
raise HTTPException(status_code=500, detail="Database session not available")
# Get provider availability
logger.debug(f"[ResearchConfig] Getting provider availability for user {user_id}")
gemini_key = get_gemini_key(user_id)
exa_key = get_exa_key(user_id)
google_available = bool(gemini_key and gemini_key.strip())
exa_available = bool(exa_key and exa_key.strip())
provider_availability = ProviderAvailability(
google_available=google_available,
exa_available=exa_available,
gemini_key_status='configured' if google_available else 'missing',
exa_key_status='configured' if exa_available else 'missing'
)
# Get persona defaults
logger.debug(f"[ResearchConfig] Getting persona defaults for user {user_id}")
db_service = OnboardingDatabaseService(db=db)
# Try to get persona data first (most reliable source for industry/target_audience)
try:
persona_data = db_service.get_persona_data(user_id, db)
except Exception as e:
logger.error(f"[ResearchConfig] Error getting persona data for user {user_id}: {e}", exc_info=True)
persona_data = None
industry = 'General'
target_audience = 'General'
if persona_data:
core_persona = persona_data.get('corePersona') or persona_data.get('core_persona')
if core_persona:
if core_persona.get('industry'):
industry = core_persona['industry']
if core_persona.get('target_audience'):
target_audience = core_persona['target_audience']
# Fallback to website analysis if persona data doesn't have industry info
if industry == 'General':
website_analysis = db_service.get_website_analysis(user_id, db)
if website_analysis:
target_audience_data = website_analysis.get('target_audience', {})
if isinstance(target_audience_data, dict):
# Extract from target_audience JSON field
industry_focus = target_audience_data.get('industry_focus')
if industry_focus:
industry = industry_focus
demographics = target_audience_data.get('demographics')
if demographics:
target_audience = demographics if isinstance(demographics, str) else str(demographics)
persona_defaults = PersonaDefaults(
industry=industry,
target_audience=target_audience,
suggested_domains=_get_domain_suggestions(industry),
suggested_exa_category=_get_exa_category_suggestion(industry)
)
# Check onboarding completion status
onboarding_completed = False
try:
logger.debug(f"[ResearchConfig] Checking onboarding status for user {user_id}")
progress_service = get_onboarding_progress_service()
onboarding_status = progress_service.get_onboarding_status(user_id)
onboarding_completed = onboarding_status.get('is_completed', False)
logger.info(
f"[ResearchConfig] Onboarding status check for user {user_id}: "
f"is_completed={onboarding_completed}, "
f"current_step={onboarding_status.get('current_step')}, "
f"progress={onboarding_status.get('completion_percentage')}"
)
except Exception as e:
logger.error(f"[ResearchConfig] Could not check onboarding status for user {user_id}: {e}", exc_info=True)
# Continue with onboarding_completed=False
# Get research persona (optional, may not exist for all users)
# CRITICAL: Use get_cached_only() to avoid triggering rate limit checks
# Only return persona if it's already cached - don't generate on config load
research_persona = None
persona_scheduled = False
try:
logger.debug(f"[ResearchConfig] Getting cached research persona for user {user_id}")
persona_service = ResearchPersonaService(db_session=db)
research_persona = persona_service.get_cached_only(user_id)
logger.info(
f"[ResearchConfig] Research persona check for user {user_id}: "
f"persona_exists={research_persona is not None}, "
f"onboarding_completed={onboarding_completed}"
)
# If onboarding is completed but persona doesn't exist, schedule generation
if onboarding_completed and not research_persona:
try:
# Check if persona data exists (to ensure we have data to generate from)
db_service = OnboardingDatabaseService(db=db)
persona_data = db_service.get_persona_data(user_id, db)
if persona_data and (persona_data.get('corePersona') or persona_data.get('platformPersonas') or
persona_data.get('core_persona') or persona_data.get('platform_personas')):
# Schedule persona generation (20 minutes from now)
schedule_research_persona_generation(user_id, delay_minutes=20)
logger.info(f"Scheduled research persona generation for user {user_id} (onboarding already completed)")
persona_scheduled = True
else:
logger.info(f"Onboarding completed but no persona data found for user {user_id} - cannot schedule persona generation")
except Exception as e:
logger.warning(f"Failed to schedule research persona generation: {e}", exc_info=True)
except Exception as e:
# get_cached_only() never raises HTTPException, but catch any unexpected errors
logger.warning(f"[ResearchConfig] Could not load cached research persona for user {user_id}: {e}", exc_info=True)
# FastAPI will automatically serialize the ResearchPersona Pydantic model
# If there's a serialization issue, we catch it and log it
try:
response = ResearchConfigResponse(
provider_availability=provider_availability,
persona_defaults=persona_defaults,
research_persona=research_persona,
onboarding_completed=onboarding_completed,
persona_scheduled=persona_scheduled
)
except Exception as serialization_error:
logger.error(f"[ResearchConfig] Failed to create ResearchConfigResponse for user {user_id}: {serialization_error}", exc_info=True)
# Try without research_persona as fallback
response = ResearchConfigResponse(
provider_availability=provider_availability,
persona_defaults=persona_defaults,
research_persona=None,
onboarding_completed=onboarding_completed,
persona_scheduled=persona_scheduled
)
logger.info(
f"[ResearchConfig] Response for user {user_id}: "
f"onboarding_completed={onboarding_completed}, "
f"persona_exists={research_persona is not None}, "
f"persona_scheduled={persona_scheduled}"
)
return response
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429, 401, etc.) to preserve status codes
raise
except Exception as e:
logger.error(f"[ResearchConfig] CRITICAL ERROR getting research config for user {user_id if user_id else 'unknown'}: {e}", exc_info=True)
import traceback
logger.error(f"[ResearchConfig] Full traceback:\n{traceback.format_exc()}")
raise HTTPException(
status_code=500,
detail=f"Failed to get research config: {str(e)}"
)
# Helper functions from RESEARCH_AI_HYPERPERSONALIZATION.md
def _get_domain_suggestions(industry: str) -> list[str]:
"""Get domain suggestions based on industry."""
domain_map = {
'Healthcare': ['pubmed.gov', 'nejm.org', 'thelancet.com', 'nih.gov'],
'Technology': ['techcrunch.com', 'wired.com', 'arstechnica.com', 'theverge.com'],
'Finance': ['wsj.com', 'bloomberg.com', 'ft.com', 'reuters.com'],
'Science': ['nature.com', 'sciencemag.org', 'cell.com', 'pnas.org'],
'Business': ['hbr.org', 'forbes.com', 'businessinsider.com', 'mckinsey.com'],
'Marketing': ['marketingland.com', 'adweek.com', 'hubspot.com', 'moz.com'],
'Education': ['edutopia.org', 'chronicle.com', 'insidehighered.com'],
'Real Estate': ['realtor.com', 'zillow.com', 'forbes.com'],
'Entertainment': ['variety.com', 'hollywoodreporter.com', 'deadline.com'],
'Travel': ['lonelyplanet.com', 'nationalgeographic.com', 'travelandleisure.com'],
'Fashion': ['vogue.com', 'elle.com', 'wwd.com'],
'Sports': ['espn.com', 'si.com', 'bleacherreport.com'],
'Law': ['law.com', 'abajournal.com', 'scotusblog.com'],
}
return domain_map.get(industry, [])
def _get_exa_category_suggestion(industry: str) -> Optional[str]:
"""Get Exa category suggestion based on industry."""
category_map = {
'Healthcare': 'research paper',
'Science': 'research paper',
'Finance': 'financial report',
'Technology': 'company',
'Business': 'company',
'Marketing': 'company',
'Education': 'research paper',
'Law': 'pdf',
}
return category_map.get(industry)