ALwrity/backend/api/research_config.py

"""
Research Configuration API
Provides provider availability and persona-aware defaults for research.
"""

from fastapi import APIRouter, Depends, HTTPException, Query
from typing import Dict, Any, Optional
from loguru import logger
from pydantic import BaseModel

from middleware.auth_middleware import get_current_user
from services.user_api_key_context import get_exa_key, get_gemini_key
from services.onboarding.database_service import OnboardingDatabaseService
from services.onboarding.progress_service import get_onboarding_progress_service
from services.database import get_db
from sqlalchemy.orm import Session
from services.research.research_persona_service import ResearchPersonaService
from services.research.research_persona_scheduler import schedule_research_persona_generation
from models.research_persona_models import ResearchPersona


router = APIRouter()


class ProviderAvailability(BaseModel):
    """Provider availability status."""
    google_available: bool
    exa_available: bool
    gemini_key_status: str  # 'configured' | 'missing'
    exa_key_status: str  # 'configured' | 'missing'


class PersonaDefaults(BaseModel):
    """Persona-aware research defaults."""
    industry: Optional[str] = None
    target_audience: Optional[str] = None
    suggested_domains: list[str] = []
    suggested_exa_category: Optional[str] = None


class ResearchConfigResponse(BaseModel):
    """Combined research configuration response."""
    provider_availability: ProviderAvailability
    persona_defaults: PersonaDefaults
    research_persona: Optional[ResearchPersona] = None
    onboarding_completed: bool = False
    persona_scheduled: bool = False


@router.get("/provider-availability", response_model=ProviderAvailability)
async def get_provider_availability(
    current_user: Dict = Depends(get_current_user)
):
    """
    Check which research providers are available for the current user.

    Returns:
        - google_available: True if Gemini key is configured
        - exa_available: True if Exa key is configured
        - Key status for each provider
    """
    try:
        user_id = str(current_user.get('id'))

        # Check API key availability
        gemini_key = get_gemini_key(user_id)
        exa_key = get_exa_key(user_id)

        google_available = bool(gemini_key and gemini_key.strip())
        exa_available = bool(exa_key and exa_key.strip())

        return ProviderAvailability(
            google_available=google_available,
            exa_available=exa_available,
            gemini_key_status='configured' if google_available else 'missing',
            exa_key_status='configured' if exa_available else 'missing'
        )
    except Exception as e:
        logger.error(f"[ResearchConfig] Error checking provider availability for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Failed to check provider availability: {str(e)}")


@router.get("/persona-defaults", response_model=PersonaDefaults)
async def get_persona_defaults(
    current_user: Dict = Depends(get_current_user),
    db: Session = Depends(get_db)
):
    """
    Get persona-aware research defaults for the current user.

    Returns industry, target audience, and smart suggestions based on onboarding data.
    """
    try:
        user_id = str(current_user.get('id'))

        # Add explicit null check for database session
        if not db:
            logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_persona_defaults")
            # Return defaults rather than error
            return PersonaDefaults()

        db_service = OnboardingDatabaseService(db=db)

        # Try to get persona data first (most reliable source for industry/target_audience)
        persona_data = db_service.get_persona_data(user_id, db)
        industry = 'General'
        target_audience = 'General'

        if persona_data:
            core_persona = persona_data.get('corePersona') or persona_data.get('core_persona')
            if core_persona:
                if core_persona.get('industry'):
                    industry = core_persona['industry']
                if core_persona.get('target_audience'):
                    target_audience = core_persona['target_audience']

        # Fallback to website analysis if persona data doesn't have industry info
        if industry == 'General':
            website_analysis = db_service.get_website_analysis(user_id, db)
            if website_analysis:
                target_audience_data = website_analysis.get('target_audience', {})
                if isinstance(target_audience_data, dict):
                    # Extract from target_audience JSON field
                    industry_focus = target_audience_data.get('industry_focus')
                    if industry_focus:
                        industry = industry_focus
                    demographics = target_audience_data.get('demographics')
                    if demographics:
                        target_audience = demographics if isinstance(demographics, str) else str(demographics)

        # Suggest domains based on industry
        suggested_domains = _get_domain_suggestions(industry)

        # Suggest Exa category based on industry
        suggested_exa_category = _get_exa_category_suggestion(industry)

        return PersonaDefaults(
            industry=industry,
            target_audience=target_audience,
            suggested_domains=suggested_domains,
            suggested_exa_category=suggested_exa_category
        )
    except Exception as e:
        logger.error(f"[ResearchConfig] Error getting persona defaults for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
        # Return defaults rather than error
        return PersonaDefaults()


@router.get("/research-persona")
async def get_research_persona(
    current_user: Dict = Depends(get_current_user),
    db: Session = Depends(get_db),
    force_refresh: bool = Query(False, description="Force regenerate persona even if cache is valid")
):
    """
    Get or generate research persona for the current user.

    Query params:
    - force_refresh: If true, regenerate persona even if cache is valid (default: false)

    Returns research persona with personalized defaults, suggestions, and configurations.
    """
    try:
        user_id = str(current_user.get('id'))
        if not user_id:
            raise HTTPException(status_code=401, detail="User not authenticated")

        # Add explicit null check for database session
        if not db:
            logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_research_persona")
            raise HTTPException(status_code=500, detail="Database not available")

        persona_service = ResearchPersonaService(db_session=db)
        research_persona = persona_service.get_or_generate(user_id, force_refresh=force_refresh)

        if not research_persona:
            raise HTTPException(
                status_code=404,
                detail="Research persona not available. Complete onboarding to generate one."
            )

        return research_persona.dict()

    except HTTPException:
        # Re-raise HTTPExceptions (e.g., 429 subscription limit) to preserve status code and details
        raise
    except Exception as e:
        logger.error(f"[ResearchConfig] Error getting research persona for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Failed to get research persona: {str(e)}")


@router.get("/config", response_model=ResearchConfigResponse)
async def get_research_config(
    current_user: Dict = Depends(get_current_user),
    db: Session = Depends(get_db)
):
    """
    Get complete research configuration including provider availability and persona defaults.
    """
    user_id = None
    try:
        user_id = str(current_user.get('id'))
        logger.info(f"[ResearchConfig] Starting get_research_config for user {user_id}")

        # Add explicit null check for database session
        if not db:
            logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_research_config")
            raise HTTPException(status_code=500, detail="Database session not available")

        # Get provider availability
        logger.debug(f"[ResearchConfig] Getting provider availability for user {user_id}")
        gemini_key = get_gemini_key(user_id)
        exa_key = get_exa_key(user_id)

        google_available = bool(gemini_key and gemini_key.strip())
        exa_available = bool(exa_key and exa_key.strip())

        provider_availability = ProviderAvailability(
            google_available=google_available,
            exa_available=exa_available,
            gemini_key_status='configured' if google_available else 'missing',
            exa_key_status='configured' if exa_available else 'missing'
        )

        # Get persona defaults
        logger.debug(f"[ResearchConfig] Getting persona defaults for user {user_id}")
        db_service = OnboardingDatabaseService(db=db)

        # Try to get persona data first (most reliable source for industry/target_audience)
        try:
            persona_data = db_service.get_persona_data(user_id, db)
        except Exception as e:
            logger.error(f"[ResearchConfig] Error getting persona data for user {user_id}: {e}", exc_info=True)
            persona_data = None

        industry = 'General'
        target_audience = 'General'

        if persona_data:
            core_persona = persona_data.get('corePersona') or persona_data.get('core_persona')
            if core_persona:
                if core_persona.get('industry'):
                    industry = core_persona['industry']
                if core_persona.get('target_audience'):
                    target_audience = core_persona['target_audience']

        # Fallback to website analysis if persona data doesn't have industry info
        if industry == 'General':
            website_analysis = db_service.get_website_analysis(user_id, db)
            if website_analysis:
                target_audience_data = website_analysis.get('target_audience', {})
                if isinstance(target_audience_data, dict):
                    # Extract from target_audience JSON field
                    industry_focus = target_audience_data.get('industry_focus')
                    if industry_focus:
                        industry = industry_focus
                    demographics = target_audience_data.get('demographics')
                    if demographics:
                        target_audience = demographics if isinstance(demographics, str) else str(demographics)

        persona_defaults = PersonaDefaults(
            industry=industry,
            target_audience=target_audience,
            suggested_domains=_get_domain_suggestions(industry),
            suggested_exa_category=_get_exa_category_suggestion(industry)
        )

        # Check onboarding completion status
        onboarding_completed = False
        try:
            logger.debug(f"[ResearchConfig] Checking onboarding status for user {user_id}")
            progress_service = get_onboarding_progress_service()
            onboarding_status = progress_service.get_onboarding_status(user_id)
            onboarding_completed = onboarding_status.get('is_completed', False)
            logger.info(
                f"[ResearchConfig] Onboarding status check for user {user_id}: "
                f"is_completed={onboarding_completed}, "
                f"current_step={onboarding_status.get('current_step')}, "
                f"progress={onboarding_status.get('completion_percentage')}"
            )
        except Exception as e:
            logger.error(f"[ResearchConfig] Could not check onboarding status for user {user_id}: {e}", exc_info=True)
            # Continue with onboarding_completed=False

        # Get research persona (optional, may not exist for all users)
        # CRITICAL: Use get_cached_only() to avoid triggering rate limit checks
        # Only return persona if it's already cached - don't generate on config load
        research_persona = None
        persona_scheduled = False
        try:
            logger.debug(f"[ResearchConfig] Getting cached research persona for user {user_id}")
            persona_service = ResearchPersonaService(db_session=db)
            research_persona = persona_service.get_cached_only(user_id)

            logger.info(
                f"[ResearchConfig] Research persona check for user {user_id}: "
                f"persona_exists={research_persona is not None}, "
                f"onboarding_completed={onboarding_completed}"
            )

            # If onboarding is completed but persona doesn't exist, schedule generation
            if onboarding_completed and not research_persona:
                try:
                    # Check if persona data exists (to ensure we have data to generate from)
                    db_service = OnboardingDatabaseService(db=db)
                    persona_data = db_service.get_persona_data(user_id, db)
                    if persona_data and (persona_data.get('corePersona') or persona_data.get('platformPersonas') or
                                        persona_data.get('core_persona') or persona_data.get('platform_personas')):
                        # Schedule persona generation (20 minutes from now)
                        schedule_research_persona_generation(user_id, delay_minutes=20)
                        logger.info(f"Scheduled research persona generation for user {user_id} (onboarding already completed)")
                        persona_scheduled = True
                    else:
                        logger.info(f"Onboarding completed but no persona data found for user {user_id} - cannot schedule persona generation")
                except Exception as e:
                    logger.warning(f"Failed to schedule research persona generation: {e}", exc_info=True)
        except Exception as e:
            # get_cached_only() never raises HTTPException, but catch any unexpected errors
            logger.warning(f"[ResearchConfig] Could not load cached research persona for user {user_id}: {e}", exc_info=True)

        # FastAPI will automatically serialize the ResearchPersona Pydantic model
        # If there's a serialization issue, we catch it and log it
        try:
            response = ResearchConfigResponse(
                provider_availability=provider_availability,
                persona_defaults=persona_defaults,
                research_persona=research_persona,
                onboarding_completed=onboarding_completed,
                persona_scheduled=persona_scheduled
            )
        except Exception as serialization_error:
            logger.error(f"[ResearchConfig] Failed to create ResearchConfigResponse for user {user_id}: {serialization_error}", exc_info=True)
            # Try without research_persona as fallback
            response = ResearchConfigResponse(
                provider_availability=provider_availability,
                persona_defaults=persona_defaults,
                research_persona=None,
                onboarding_completed=onboarding_completed,
                persona_scheduled=persona_scheduled
            )

        logger.info(
            f"[ResearchConfig] Response for user {user_id}: "
            f"onboarding_completed={onboarding_completed}, "
            f"persona_exists={research_persona is not None}, "
            f"persona_scheduled={persona_scheduled}"
        )

        return response
    except HTTPException:
        # Re-raise HTTPExceptions (e.g., 429, 401, etc.) to preserve status codes
        raise
    except Exception as e:
        logger.error(f"[ResearchConfig] CRITICAL ERROR getting research config for user {user_id if user_id else 'unknown'}: {e}", exc_info=True)
        import traceback
        logger.error(f"[ResearchConfig] Full traceback:\n{traceback.format_exc()}")
        raise HTTPException(
            status_code=500,
            detail=f"Failed to get research config: {str(e)}"
        )


# Helper functions from RESEARCH_AI_HYPERPERSONALIZATION.md

def _get_domain_suggestions(industry: str) -> list[str]:
    """Get domain suggestions based on industry."""
    domain_map = {
        'Healthcare': ['pubmed.gov', 'nejm.org', 'thelancet.com', 'nih.gov'],
        'Technology': ['techcrunch.com', 'wired.com', 'arstechnica.com', 'theverge.com'],
        'Finance': ['wsj.com', 'bloomberg.com', 'ft.com', 'reuters.com'],
        'Science': ['nature.com', 'sciencemag.org', 'cell.com', 'pnas.org'],
        'Business': ['hbr.org', 'forbes.com', 'businessinsider.com', 'mckinsey.com'],
        'Marketing': ['marketingland.com', 'adweek.com', 'hubspot.com', 'moz.com'],
        'Education': ['edutopia.org', 'chronicle.com', 'insidehighered.com'],
        'Real Estate': ['realtor.com', 'zillow.com', 'forbes.com'],
        'Entertainment': ['variety.com', 'hollywoodreporter.com', 'deadline.com'],
        'Travel': ['lonelyplanet.com', 'nationalgeographic.com', 'travelandleisure.com'],
        'Fashion': ['vogue.com', 'elle.com', 'wwd.com'],
        'Sports': ['espn.com', 'si.com', 'bleacherreport.com'],
        'Law': ['law.com', 'abajournal.com', 'scotusblog.com'],
    }
    return domain_map.get(industry, [])


def _get_exa_category_suggestion(industry: str) -> Optional[str]:
    """Get Exa category suggestion based on industry."""
    category_map = {
        'Healthcare': 'research paper',
        'Science': 'research paper',
        'Finance': 'financial report',
        'Technology': 'company',
        'Business': 'company',
        'Marketing': 'company',
        'Education': 'research paper',
        'Law': 'pdf',
    }
    return category_map.get(industry)