moreminimore-marketing/backend/models/research_intent_models.py

"""
Research Intent Models

Pydantic models for understanding user research intent.
These models capture what the user actually wants to accomplish from their research,
enabling targeted query generation and intent-aware result analysis.

Author: ALwrity Team
Version: 1.0
"""

from enum import Enum
from typing import Dict, Any, List, Optional, Union
from pydantic import BaseModel, Field
from datetime import datetime


class ResearchPurpose(str, Enum):
    """Why is the user researching?"""
    LEARN = "learn"  # Understand a topic for personal knowledge
    CREATE_CONTENT = "create_content"  # Write article/blog/podcast/video
    MAKE_DECISION = "make_decision"  # Choose between options
    COMPARE = "compare"  # Compare alternatives/competitors
    SOLVE_PROBLEM = "solve_problem"  # Find solution to a problem
    FIND_DATA = "find_data"  # Get statistics/facts/citations
    EXPLORE_TRENDS = "explore_trends"  # Understand market/industry trends
    VALIDATE = "validate"  # Verify claims/information
    GENERATE_IDEAS = "generate_ideas"  # Brainstorm content ideas


class ContentOutput(str, Enum):
    """What content type will be created from this research?"""
    BLOG = "blog"
    PODCAST = "podcast"
    VIDEO = "video"
    SOCIAL_POST = "social_post"
    NEWSLETTER = "newsletter"
    PRESENTATION = "presentation"
    REPORT = "report"
    WHITEPAPER = "whitepaper"
    EMAIL = "email"
    GENERAL = "general"  # No specific output


class ExpectedDeliverable(str, Enum):
    """What specific outputs the user expects from research."""
    KEY_STATISTICS = "key_statistics"  # Numbers, data points, percentages
    EXPERT_QUOTES = "expert_quotes"  # Authoritative statements
    CASE_STUDIES = "case_studies"  # Real examples and success stories
    COMPARISONS = "comparisons"  # Side-by-side analysis
    TRENDS = "trends"  # Market/industry trends
    BEST_PRACTICES = "best_practices"  # Recommendations and guidelines
    STEP_BY_STEP = "step_by_step"  # Process/how-to instructions
    PROS_CONS = "pros_cons"  # Advantages/disadvantages
    DEFINITIONS = "definitions"  # Clear explanations of concepts
    CITATIONS = "citations"  # Authoritative sources
    EXAMPLES = "examples"  # Concrete examples
    PREDICTIONS = "predictions"  # Future outlook


class ResearchDepthLevel(str, Enum):
    """How deep the research should go."""
    OVERVIEW = "overview"  # Quick summary, surface level
    DETAILED = "detailed"  # In-depth analysis
    EXPERT = "expert"  # Comprehensive, expert-level research


class InputType(str, Enum):
    """Type of user input detected."""
    KEYWORDS = "keywords"  # Simple keywords: "AI healthcare 2025"
    QUESTION = "question"  # A question: "What are the best AI tools?"
    GOAL = "goal"  # Goal statement: "I need to write a blog about..."
    MIXED = "mixed"  # Combination of above


# ============================================================================
# Structured Deliverable Models
# ============================================================================

class StatisticWithCitation(BaseModel):
    """A statistic with full attribution."""
    statistic: str = Field(..., description="The full statistical statement")
    value: Optional[str] = Field(None, description="The numeric value (e.g., '72%')")
    context: str = Field(..., description="Context of when/where this was measured")
    source: str = Field(..., description="Source name/publication")
    url: str = Field(..., description="Source URL")
    credibility: float = Field(0.8, ge=0.0, le=1.0, description="Credibility score 0-1")
    recency: Optional[str] = Field(None, description="How recent the data is")


class ExpertQuote(BaseModel):
    """A quote from an authoritative source."""
    quote: str = Field(..., description="The actual quote")
    speaker: str = Field(..., description="Name of the speaker")
    title: Optional[str] = Field(None, description="Title/role of the speaker")
    organization: Optional[str] = Field(None, description="Organization/company")
    context: Optional[str] = Field(None, description="Context of the quote")
    source: str = Field(..., description="Source name")
    url: str = Field(..., description="Source URL")


class CaseStudySummary(BaseModel):
    """Summary of a case study."""
    title: str = Field(..., description="Case study title")
    organization: str = Field(..., description="Organization featured")
    challenge: str = Field(..., description="The challenge/problem faced")
    solution: str = Field(..., description="The solution implemented")
    outcome: str = Field(..., description="The results achieved")
    key_metrics: List[str] = Field(default_factory=list, description="Key metrics/numbers")
    source: str = Field(..., description="Source name")
    url: str = Field(..., description="Source URL")


class TrendAnalysis(BaseModel):
    """Analysis of a trend."""
    trend: str = Field(..., description="The trend description")
    direction: str = Field(..., description="growing, declining, emerging, stable")
    evidence: List[str] = Field(default_factory=list, description="Supporting evidence")
    impact: Optional[str] = Field(None, description="Potential impact")
    timeline: Optional[str] = Field(None, description="Timeline of the trend")
    sources: List[str] = Field(default_factory=list, description="Source URLs")


class ComparisonItem(BaseModel):
    """An item in a comparison."""
    name: str
    description: Optional[str] = None
    pros: List[str] = Field(default_factory=list)
    cons: List[str] = Field(default_factory=list)
    features: Dict[str, str] = Field(default_factory=dict)
    rating: Optional[float] = None
    source: Optional[str] = None


class ComparisonTable(BaseModel):
    """Comparison between options."""
    title: str = Field(..., description="Comparison title")
    criteria: List[str] = Field(default_factory=list, description="Comparison criteria")
    items: List[ComparisonItem] = Field(default_factory=list, description="Items being compared")
    winner: Optional[str] = Field(None, description="Recommended option if applicable")
    verdict: Optional[str] = Field(None, description="Summary verdict")


class ProsCons(BaseModel):
    """Pros and cons analysis."""
    subject: str = Field(..., description="What is being analyzed")
    pros: List[str] = Field(default_factory=list, description="Advantages")
    cons: List[str] = Field(default_factory=list, description="Disadvantages")
    balanced_verdict: str = Field(..., description="Balanced conclusion")


class SourceWithRelevance(BaseModel):
    """A source with relevance information."""
    title: str
    url: str
    excerpt: Optional[str] = None
    relevance_score: float = Field(0.8, ge=0.0, le=1.0)
    relevance_reason: Optional[str] = None
    content_type: Optional[str] = None  # article, research paper, news, etc.
    published_date: Optional[str] = None
    credibility_score: float = Field(0.8, ge=0.0, le=1.0)


# ============================================================================
# Intent Models
# ============================================================================

class ResearchIntent(BaseModel):
    """
    What the user actually wants from their research.
    This is inferred from user input + research persona.
    """

    # Core understanding
    primary_question: str = Field(..., description="The main question to answer")
    secondary_questions: List[str] = Field(
        default_factory=list,
        description="Related questions that should be answered"
    )

    # Purpose classification
    purpose: ResearchPurpose = Field(
        ResearchPurpose.LEARN,
        description="Why the user is researching"
    )
    content_output: ContentOutput = Field(
        ContentOutput.GENERAL,
        description="What content type will be created"
    )

    # What they need from results
    expected_deliverables: List[ExpectedDeliverable] = Field(
        default_factory=list,
        description="Specific outputs the user expects"
    )

    # Depth and focus
    depth: ResearchDepthLevel = Field(
        ResearchDepthLevel.DETAILED,
        description="How deep the research should go"
    )
    focus_areas: List[str] = Field(
        default_factory=list,
        description="Specific aspects to focus on"
    )

    # Constraints
    perspective: Optional[str] = Field(
        None,
        description="Perspective to research from (e.g., 'hospital administrator')"
    )
    time_sensitivity: Optional[str] = Field(
        None,
        description="Time constraint: 'real_time', 'recent', 'historical', 'evergreen'"
    )

    # Detected input type
    input_type: InputType = Field(
        InputType.KEYWORDS,
        description="Type of user input detected"
    )

    # Original user input (for reference)
    original_input: str = Field(..., description="The original user input")

    # Confidence in inference
    confidence: float = Field(
        0.8,
        ge=0.0,
        le=1.0,
        description="Confidence in the intent inference"
    )
    needs_clarification: bool = Field(
        False,
        description="True if AI is uncertain and needs user clarification"
    )
    clarifying_questions: List[str] = Field(
        default_factory=list,
        description="Questions to ask user if uncertain"
    )

    class Config:
        use_enum_values = True


class ResearchQuery(BaseModel):
    """A targeted research query with purpose."""
    query: str = Field(..., description="The search query")
    purpose: ExpectedDeliverable = Field(..., description="What this query targets")
    provider: str = Field("exa", description="Preferred provider: exa, tavily, google")
    priority: int = Field(1, ge=1, le=5, description="Priority 1-5, higher = more important")
    expected_results: str = Field(..., description="What we expect to find with this query")


class IntentInferenceRequest(BaseModel):
    """Request to infer research intent from user input."""
    user_input: str = Field(..., description="User's keywords, question, or goal")
    keywords: List[str] = Field(default_factory=list, description="Extracted keywords")
    use_persona: bool = Field(True, description="Use research persona for context")
    use_competitor_data: bool = Field(True, description="Use competitor data for context")


class IntentInferenceResponse(BaseModel):
    """Response from intent inference."""
    success: bool = True
    intent: ResearchIntent
    analysis_summary: str = Field(..., description="AI's understanding of user intent")
    suggested_queries: List[ResearchQuery] = Field(
        default_factory=list,
        description="Generated research queries based on intent"
    )
    suggested_keywords: List[str] = Field(
        default_factory=list,
        description="Enhanced/expanded keywords"
    )
    suggested_angles: List[str] = Field(
        default_factory=list,
        description="Research angles to explore"
    )
    quick_options: List[Dict[str, Any]] = Field(
        default_factory=list,
        description="Quick options for user to confirm/modify intent"
    )


# ============================================================================
# Intent-Driven Research Result
# ============================================================================

class IntentDrivenResearchResult(BaseModel):
    """
    Research results organized by what user needs.
    This is the final output after intent-aware analysis.
    """

    success: bool = True

    # Direct answers
    primary_answer: str = Field(..., description="Direct answer to primary question")
    secondary_answers: Dict[str, str] = Field(
        default_factory=dict,
        description="Answers to secondary questions (question → answer)"
    )

    # Deliverables (populated based on user's expected_deliverables)
    statistics: List[StatisticWithCitation] = Field(default_factory=list)
    expert_quotes: List[ExpertQuote] = Field(default_factory=list)
    case_studies: List[CaseStudySummary] = Field(default_factory=list)
    comparisons: List[ComparisonTable] = Field(default_factory=list)
    trends: List[TrendAnalysis] = Field(default_factory=list)
    best_practices: List[str] = Field(default_factory=list)
    step_by_step: List[str] = Field(default_factory=list)
    pros_cons: Optional[ProsCons] = None
    definitions: Dict[str, str] = Field(
        default_factory=dict,
        description="Term → definition mappings"
    )
    examples: List[str] = Field(default_factory=list)
    predictions: List[str] = Field(default_factory=list)

    # Content-ready outputs
    executive_summary: str = Field("", description="2-3 sentence summary")
    key_takeaways: List[str] = Field(
        default_factory=list,
        description="5-7 key bullet points"
    )
    suggested_outline: List[str] = Field(
        default_factory=list,
        description="Suggested content outline if creating content"
    )

    # Supporting data
    sources: List[SourceWithRelevance] = Field(default_factory=list)
    raw_content: Optional[str] = Field(None, description="Raw content for further processing")

    # Research quality metadata
    confidence: float = Field(0.8, ge=0.0, le=1.0)
    gaps_identified: List[str] = Field(
        default_factory=list,
        description="What we couldn't find"
    )
    follow_up_queries: List[str] = Field(
        default_factory=list,
        description="Suggested additional research"
    )

    # Original intent for reference
    original_intent: Optional[ResearchIntent] = None

    # Error handling
    error_message: Optional[str] = None

    class Config:
        use_enum_values = True