Base code

2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions
--- a/backend/models/research_intent_models.py
+++ b/backend/models/research_intent_models.py
@@ -0,0 +1,355 @@
+"""
+Research Intent Models
+
+Pydantic models for understanding user research intent.
+These models capture what the user actually wants to accomplish from their research,
+enabling targeted query generation and intent-aware result analysis.
+
+Author: ALwrity Team
+Version: 1.0
+"""
+
+from enum import Enum
+from typing import Dict, Any, List, Optional, Union
+from pydantic import BaseModel, Field
+from datetime import datetime
+
+
+class ResearchPurpose(str, Enum):
+    """Why is the user researching?"""
+    LEARN = "learn"  # Understand a topic for personal knowledge
+    CREATE_CONTENT = "create_content"  # Write article/blog/podcast/video
+    MAKE_DECISION = "make_decision"  # Choose between options
+    COMPARE = "compare"  # Compare alternatives/competitors
+    SOLVE_PROBLEM = "solve_problem"  # Find solution to a problem
+    FIND_DATA = "find_data"  # Get statistics/facts/citations
+    EXPLORE_TRENDS = "explore_trends"  # Understand market/industry trends
+    VALIDATE = "validate"  # Verify claims/information
+    GENERATE_IDEAS = "generate_ideas"  # Brainstorm content ideas
+
+
+class ContentOutput(str, Enum):
+    """What content type will be created from this research?"""
+    BLOG = "blog"
+    PODCAST = "podcast"
+    VIDEO = "video"
+    SOCIAL_POST = "social_post"
+    NEWSLETTER = "newsletter"
+    PRESENTATION = "presentation"
+    REPORT = "report"
+    WHITEPAPER = "whitepaper"
+    EMAIL = "email"
+    GENERAL = "general"  # No specific output
+
+
+class ExpectedDeliverable(str, Enum):
+    """What specific outputs the user expects from research."""
+    KEY_STATISTICS = "key_statistics"  # Numbers, data points, percentages
+    EXPERT_QUOTES = "expert_quotes"  # Authoritative statements
+    CASE_STUDIES = "case_studies"  # Real examples and success stories
+    COMPARISONS = "comparisons"  # Side-by-side analysis
+    TRENDS = "trends"  # Market/industry trends
+    BEST_PRACTICES = "best_practices"  # Recommendations and guidelines
+    STEP_BY_STEP = "step_by_step"  # Process/how-to instructions
+    PROS_CONS = "pros_cons"  # Advantages/disadvantages
+    DEFINITIONS = "definitions"  # Clear explanations of concepts
+    CITATIONS = "citations"  # Authoritative sources
+    EXAMPLES = "examples"  # Concrete examples
+    PREDICTIONS = "predictions"  # Future outlook
+
+
+class ResearchDepthLevel(str, Enum):
+    """How deep the research should go."""
+    OVERVIEW = "overview"  # Quick summary, surface level
+    DETAILED = "detailed"  # In-depth analysis
+    EXPERT = "expert"  # Comprehensive, expert-level research
+
+
+class InputType(str, Enum):
+    """Type of user input detected."""
+    KEYWORDS = "keywords"  # Simple keywords: "AI healthcare 2025"
+    QUESTION = "question"  # A question: "What are the best AI tools?"
+    GOAL = "goal"  # Goal statement: "I need to write a blog about..."
+    MIXED = "mixed"  # Combination of above
+
+
+# ============================================================================
+# Structured Deliverable Models
+# ============================================================================
+
+class StatisticWithCitation(BaseModel):
+    """A statistic with full attribution."""
+    statistic: str = Field(..., description="The full statistical statement")
+    value: Optional[str] = Field(None, description="The numeric value (e.g., '72%')")
+    context: str = Field(..., description="Context of when/where this was measured")
+    source: str = Field(..., description="Source name/publication")
+    url: str = Field(..., description="Source URL")
+    credibility: float = Field(0.8, ge=0.0, le=1.0, description="Credibility score 0-1")
+    recency: Optional[str] = Field(None, description="How recent the data is")
+
+
+class ExpertQuote(BaseModel):
+    """A quote from an authoritative source."""
+    quote: str = Field(..., description="The actual quote")
+    speaker: str = Field(..., description="Name of the speaker")
+    title: Optional[str] = Field(None, description="Title/role of the speaker")
+    organization: Optional[str] = Field(None, description="Organization/company")
+    context: Optional[str] = Field(None, description="Context of the quote")
+    source: str = Field(..., description="Source name")
+    url: str = Field(..., description="Source URL")
+
+
+class CaseStudySummary(BaseModel):
+    """Summary of a case study."""
+    title: str = Field(..., description="Case study title")
+    organization: str = Field(..., description="Organization featured")
+    challenge: str = Field(..., description="The challenge/problem faced")
+    solution: str = Field(..., description="The solution implemented")
+    outcome: str = Field(..., description="The results achieved")
+    key_metrics: List[str] = Field(default_factory=list, description="Key metrics/numbers")
+    source: str = Field(..., description="Source name")
+    url: str = Field(..., description="Source URL")
+
+
+class TrendAnalysis(BaseModel):
+    """Analysis of a trend."""
+    trend: str = Field(..., description="The trend description")
+    direction: str = Field(..., description="growing, declining, emerging, stable")
+    evidence: List[str] = Field(default_factory=list, description="Supporting evidence")
+    impact: Optional[str] = Field(None, description="Potential impact")
+    timeline: Optional[str] = Field(None, description="Timeline of the trend")
+    sources: List[str] = Field(default_factory=list, description="Source URLs")
+
+
+class ComparisonItem(BaseModel):
+    """An item in a comparison."""
+    name: str
+    description: Optional[str] = None
+    pros: List[str] = Field(default_factory=list)
+    cons: List[str] = Field(default_factory=list)
+    features: Dict[str, str] = Field(default_factory=dict)
+    rating: Optional[float] = None
+    source: Optional[str] = None
+
+
+class ComparisonTable(BaseModel):
+    """Comparison between options."""
+    title: str = Field(..., description="Comparison title")
+    criteria: List[str] = Field(default_factory=list, description="Comparison criteria")
+    items: List[ComparisonItem] = Field(default_factory=list, description="Items being compared")
+    winner: Optional[str] = Field(None, description="Recommended option if applicable")
+    verdict: Optional[str] = Field(None, description="Summary verdict")
+
+
+class ProsCons(BaseModel):
+    """Pros and cons analysis."""
+    subject: str = Field(..., description="What is being analyzed")
+    pros: List[str] = Field(default_factory=list, description="Advantages")
+    cons: List[str] = Field(default_factory=list, description="Disadvantages")
+    balanced_verdict: str = Field(..., description="Balanced conclusion")
+
+
+class SourceWithRelevance(BaseModel):
+    """A source with relevance information."""
+    title: str
+    url: str
+    excerpt: Optional[str] = None
+    relevance_score: float = Field(0.8, ge=0.0, le=1.0)
+    relevance_reason: Optional[str] = None
+    content_type: Optional[str] = None  # article, research paper, news, etc.
+    published_date: Optional[str] = None
+    credibility_score: float = Field(0.8, ge=0.0, le=1.0)
+
+
+# ============================================================================
+# Intent Models
+# ============================================================================
+
+class ResearchIntent(BaseModel):
+    """
+    What the user actually wants from their research.
+    This is inferred from user input + research persona.
+    """
+    
+    # Core understanding
+    primary_question: str = Field(..., description="The main question to answer")
+    secondary_questions: List[str] = Field(
+        default_factory=list, 
+        description="Related questions that should be answered"
+    )
+    
+    # Purpose classification
+    purpose: ResearchPurpose = Field(
+        ResearchPurpose.LEARN,
+        description="Why the user is researching"
+    )
+    content_output: ContentOutput = Field(
+        ContentOutput.GENERAL,
+        description="What content type will be created"
+    )
+    
+    # What they need from results
+    expected_deliverables: List[ExpectedDeliverable] = Field(
+        default_factory=list,
+        description="Specific outputs the user expects"
+    )
+    
+    # Depth and focus
+    depth: ResearchDepthLevel = Field(
+        ResearchDepthLevel.DETAILED,
+        description="How deep the research should go"
+    )
+    focus_areas: List[str] = Field(
+        default_factory=list,
+        description="Specific aspects to focus on"
+    )
+    
+    # Constraints
+    perspective: Optional[str] = Field(
+        None, 
+        description="Perspective to research from (e.g., 'hospital administrator')"
+    )
+    time_sensitivity: Optional[str] = Field(
+        None,
+        description="Time constraint: 'real_time', 'recent', 'historical', 'evergreen'"
+    )
+    
+    # Detected input type
+    input_type: InputType = Field(
+        InputType.KEYWORDS,
+        description="Type of user input detected"
+    )
+    
+    # Original user input (for reference)
+    original_input: str = Field(..., description="The original user input")
+    
+    # Confidence in inference
+    confidence: float = Field(
+        0.8, 
+        ge=0.0, 
+        le=1.0,
+        description="Confidence in the intent inference"
+    )
+    needs_clarification: bool = Field(
+        False,
+        description="True if AI is uncertain and needs user clarification"
+    )
+    clarifying_questions: List[str] = Field(
+        default_factory=list,
+        description="Questions to ask user if uncertain"
+    )
+    
+    class Config:
+        use_enum_values = True
+
+
+class ResearchQuery(BaseModel):
+    """A targeted research query with purpose."""
+    query: str = Field(..., description="The search query")
+    purpose: ExpectedDeliverable = Field(..., description="What this query targets")
+    provider: str = Field("exa", description="Preferred provider: exa, tavily, google")
+    priority: int = Field(1, ge=1, le=5, description="Priority 1-5, higher = more important")
+    expected_results: str = Field(..., description="What we expect to find with this query")
+
+
+class IntentInferenceRequest(BaseModel):
+    """Request to infer research intent from user input."""
+    user_input: str = Field(..., description="User's keywords, question, or goal")
+    keywords: List[str] = Field(default_factory=list, description="Extracted keywords")
+    use_persona: bool = Field(True, description="Use research persona for context")
+    use_competitor_data: bool = Field(True, description="Use competitor data for context")
+
+
+class IntentInferenceResponse(BaseModel):
+    """Response from intent inference."""
+    success: bool = True
+    intent: ResearchIntent
+    analysis_summary: str = Field(..., description="AI's understanding of user intent")
+    suggested_queries: List[ResearchQuery] = Field(
+        default_factory=list,
+        description="Generated research queries based on intent"
+    )
+    suggested_keywords: List[str] = Field(
+        default_factory=list,
+        description="Enhanced/expanded keywords"
+    )
+    suggested_angles: List[str] = Field(
+        default_factory=list,
+        description="Research angles to explore"
+    )
+    quick_options: List[Dict[str, Any]] = Field(
+        default_factory=list,
+        description="Quick options for user to confirm/modify intent"
+    )
+
+
+# ============================================================================
+# Intent-Driven Research Result
+# ============================================================================
+
+class IntentDrivenResearchResult(BaseModel):
+    """
+    Research results organized by what user needs.
+    This is the final output after intent-aware analysis.
+    """
+    
+    success: bool = True
+    
+    # Direct answers
+    primary_answer: str = Field(..., description="Direct answer to primary question")
+    secondary_answers: Dict[str, str] = Field(
+        default_factory=dict,
+        description="Answers to secondary questions (question → answer)"
+    )
+    
+    # Deliverables (populated based on user's expected_deliverables)
+    statistics: List[StatisticWithCitation] = Field(default_factory=list)
+    expert_quotes: List[ExpertQuote] = Field(default_factory=list)
+    case_studies: List[CaseStudySummary] = Field(default_factory=list)
+    comparisons: List[ComparisonTable] = Field(default_factory=list)
+    trends: List[TrendAnalysis] = Field(default_factory=list)
+    best_practices: List[str] = Field(default_factory=list)
+    step_by_step: List[str] = Field(default_factory=list)
+    pros_cons: Optional[ProsCons] = None
+    definitions: Dict[str, str] = Field(
+        default_factory=dict,
+        description="Term → definition mappings"
+    )
+    examples: List[str] = Field(default_factory=list)
+    predictions: List[str] = Field(default_factory=list)
+    
+    # Content-ready outputs
+    executive_summary: str = Field("", description="2-3 sentence summary")
+    key_takeaways: List[str] = Field(
+        default_factory=list,
+        description="5-7 key bullet points"
+    )
+    suggested_outline: List[str] = Field(
+        default_factory=list,
+        description="Suggested content outline if creating content"
+    )
+    
+    # Supporting data
+    sources: List[SourceWithRelevance] = Field(default_factory=list)
+    raw_content: Optional[str] = Field(None, description="Raw content for further processing")
+    
+    # Research quality metadata
+    confidence: float = Field(0.8, ge=0.0, le=1.0)
+    gaps_identified: List[str] = Field(
+        default_factory=list,
+        description="What we couldn't find"
+    )
+    follow_up_queries: List[str] = Field(
+        default_factory=list,
+        description="Suggested additional research"
+    )
+    
+    # Original intent for reference
+    original_intent: Optional[ResearchIntent] = None
+    
+    # Error handling
+    error_message: Optional[str] = None
+    
+    class Config:
+        use_enum_values = True
+