356 lines
14 KiB
Python
356 lines
14 KiB
Python
"""
|
|
Research Intent Models
|
|
|
|
Pydantic models for understanding user research intent.
|
|
These models capture what the user actually wants to accomplish from their research,
|
|
enabling targeted query generation and intent-aware result analysis.
|
|
|
|
Author: ALwrity Team
|
|
Version: 1.0
|
|
"""
|
|
|
|
from enum import Enum
|
|
from typing import Dict, Any, List, Optional, Union
|
|
from pydantic import BaseModel, Field
|
|
from datetime import datetime
|
|
|
|
|
|
class ResearchPurpose(str, Enum):
|
|
"""Why is the user researching?"""
|
|
LEARN = "learn" # Understand a topic for personal knowledge
|
|
CREATE_CONTENT = "create_content" # Write article/blog/podcast/video
|
|
MAKE_DECISION = "make_decision" # Choose between options
|
|
COMPARE = "compare" # Compare alternatives/competitors
|
|
SOLVE_PROBLEM = "solve_problem" # Find solution to a problem
|
|
FIND_DATA = "find_data" # Get statistics/facts/citations
|
|
EXPLORE_TRENDS = "explore_trends" # Understand market/industry trends
|
|
VALIDATE = "validate" # Verify claims/information
|
|
GENERATE_IDEAS = "generate_ideas" # Brainstorm content ideas
|
|
|
|
|
|
class ContentOutput(str, Enum):
|
|
"""What content type will be created from this research?"""
|
|
BLOG = "blog"
|
|
PODCAST = "podcast"
|
|
VIDEO = "video"
|
|
SOCIAL_POST = "social_post"
|
|
NEWSLETTER = "newsletter"
|
|
PRESENTATION = "presentation"
|
|
REPORT = "report"
|
|
WHITEPAPER = "whitepaper"
|
|
EMAIL = "email"
|
|
GENERAL = "general" # No specific output
|
|
|
|
|
|
class ExpectedDeliverable(str, Enum):
|
|
"""What specific outputs the user expects from research."""
|
|
KEY_STATISTICS = "key_statistics" # Numbers, data points, percentages
|
|
EXPERT_QUOTES = "expert_quotes" # Authoritative statements
|
|
CASE_STUDIES = "case_studies" # Real examples and success stories
|
|
COMPARISONS = "comparisons" # Side-by-side analysis
|
|
TRENDS = "trends" # Market/industry trends
|
|
BEST_PRACTICES = "best_practices" # Recommendations and guidelines
|
|
STEP_BY_STEP = "step_by_step" # Process/how-to instructions
|
|
PROS_CONS = "pros_cons" # Advantages/disadvantages
|
|
DEFINITIONS = "definitions" # Clear explanations of concepts
|
|
CITATIONS = "citations" # Authoritative sources
|
|
EXAMPLES = "examples" # Concrete examples
|
|
PREDICTIONS = "predictions" # Future outlook
|
|
|
|
|
|
class ResearchDepthLevel(str, Enum):
|
|
"""How deep the research should go."""
|
|
OVERVIEW = "overview" # Quick summary, surface level
|
|
DETAILED = "detailed" # In-depth analysis
|
|
EXPERT = "expert" # Comprehensive, expert-level research
|
|
|
|
|
|
class InputType(str, Enum):
|
|
"""Type of user input detected."""
|
|
KEYWORDS = "keywords" # Simple keywords: "AI healthcare 2025"
|
|
QUESTION = "question" # A question: "What are the best AI tools?"
|
|
GOAL = "goal" # Goal statement: "I need to write a blog about..."
|
|
MIXED = "mixed" # Combination of above
|
|
|
|
|
|
# ============================================================================
|
|
# Structured Deliverable Models
|
|
# ============================================================================
|
|
|
|
class StatisticWithCitation(BaseModel):
|
|
"""A statistic with full attribution."""
|
|
statistic: str = Field(..., description="The full statistical statement")
|
|
value: Optional[str] = Field(None, description="The numeric value (e.g., '72%')")
|
|
context: str = Field(..., description="Context of when/where this was measured")
|
|
source: str = Field(..., description="Source name/publication")
|
|
url: str = Field(..., description="Source URL")
|
|
credibility: float = Field(0.8, ge=0.0, le=1.0, description="Credibility score 0-1")
|
|
recency: Optional[str] = Field(None, description="How recent the data is")
|
|
|
|
|
|
class ExpertQuote(BaseModel):
|
|
"""A quote from an authoritative source."""
|
|
quote: str = Field(..., description="The actual quote")
|
|
speaker: str = Field(..., description="Name of the speaker")
|
|
title: Optional[str] = Field(None, description="Title/role of the speaker")
|
|
organization: Optional[str] = Field(None, description="Organization/company")
|
|
context: Optional[str] = Field(None, description="Context of the quote")
|
|
source: str = Field(..., description="Source name")
|
|
url: str = Field(..., description="Source URL")
|
|
|
|
|
|
class CaseStudySummary(BaseModel):
|
|
"""Summary of a case study."""
|
|
title: str = Field(..., description="Case study title")
|
|
organization: str = Field(..., description="Organization featured")
|
|
challenge: str = Field(..., description="The challenge/problem faced")
|
|
solution: str = Field(..., description="The solution implemented")
|
|
outcome: str = Field(..., description="The results achieved")
|
|
key_metrics: List[str] = Field(default_factory=list, description="Key metrics/numbers")
|
|
source: str = Field(..., description="Source name")
|
|
url: str = Field(..., description="Source URL")
|
|
|
|
|
|
class TrendAnalysis(BaseModel):
|
|
"""Analysis of a trend."""
|
|
trend: str = Field(..., description="The trend description")
|
|
direction: str = Field(..., description="growing, declining, emerging, stable")
|
|
evidence: List[str] = Field(default_factory=list, description="Supporting evidence")
|
|
impact: Optional[str] = Field(None, description="Potential impact")
|
|
timeline: Optional[str] = Field(None, description="Timeline of the trend")
|
|
sources: List[str] = Field(default_factory=list, description="Source URLs")
|
|
|
|
|
|
class ComparisonItem(BaseModel):
|
|
"""An item in a comparison."""
|
|
name: str
|
|
description: Optional[str] = None
|
|
pros: List[str] = Field(default_factory=list)
|
|
cons: List[str] = Field(default_factory=list)
|
|
features: Dict[str, str] = Field(default_factory=dict)
|
|
rating: Optional[float] = None
|
|
source: Optional[str] = None
|
|
|
|
|
|
class ComparisonTable(BaseModel):
|
|
"""Comparison between options."""
|
|
title: str = Field(..., description="Comparison title")
|
|
criteria: List[str] = Field(default_factory=list, description="Comparison criteria")
|
|
items: List[ComparisonItem] = Field(default_factory=list, description="Items being compared")
|
|
winner: Optional[str] = Field(None, description="Recommended option if applicable")
|
|
verdict: Optional[str] = Field(None, description="Summary verdict")
|
|
|
|
|
|
class ProsCons(BaseModel):
|
|
"""Pros and cons analysis."""
|
|
subject: str = Field(..., description="What is being analyzed")
|
|
pros: List[str] = Field(default_factory=list, description="Advantages")
|
|
cons: List[str] = Field(default_factory=list, description="Disadvantages")
|
|
balanced_verdict: str = Field(..., description="Balanced conclusion")
|
|
|
|
|
|
class SourceWithRelevance(BaseModel):
|
|
"""A source with relevance information."""
|
|
title: str
|
|
url: str
|
|
excerpt: Optional[str] = None
|
|
relevance_score: float = Field(0.8, ge=0.0, le=1.0)
|
|
relevance_reason: Optional[str] = None
|
|
content_type: Optional[str] = None # article, research paper, news, etc.
|
|
published_date: Optional[str] = None
|
|
credibility_score: float = Field(0.8, ge=0.0, le=1.0)
|
|
|
|
|
|
# ============================================================================
|
|
# Intent Models
|
|
# ============================================================================
|
|
|
|
class ResearchIntent(BaseModel):
|
|
"""
|
|
What the user actually wants from their research.
|
|
This is inferred from user input + research persona.
|
|
"""
|
|
|
|
# Core understanding
|
|
primary_question: str = Field(..., description="The main question to answer")
|
|
secondary_questions: List[str] = Field(
|
|
default_factory=list,
|
|
description="Related questions that should be answered"
|
|
)
|
|
|
|
# Purpose classification
|
|
purpose: ResearchPurpose = Field(
|
|
ResearchPurpose.LEARN,
|
|
description="Why the user is researching"
|
|
)
|
|
content_output: ContentOutput = Field(
|
|
ContentOutput.GENERAL,
|
|
description="What content type will be created"
|
|
)
|
|
|
|
# What they need from results
|
|
expected_deliverables: List[ExpectedDeliverable] = Field(
|
|
default_factory=list,
|
|
description="Specific outputs the user expects"
|
|
)
|
|
|
|
# Depth and focus
|
|
depth: ResearchDepthLevel = Field(
|
|
ResearchDepthLevel.DETAILED,
|
|
description="How deep the research should go"
|
|
)
|
|
focus_areas: List[str] = Field(
|
|
default_factory=list,
|
|
description="Specific aspects to focus on"
|
|
)
|
|
|
|
# Constraints
|
|
perspective: Optional[str] = Field(
|
|
None,
|
|
description="Perspective to research from (e.g., 'hospital administrator')"
|
|
)
|
|
time_sensitivity: Optional[str] = Field(
|
|
None,
|
|
description="Time constraint: 'real_time', 'recent', 'historical', 'evergreen'"
|
|
)
|
|
|
|
# Detected input type
|
|
input_type: InputType = Field(
|
|
InputType.KEYWORDS,
|
|
description="Type of user input detected"
|
|
)
|
|
|
|
# Original user input (for reference)
|
|
original_input: str = Field(..., description="The original user input")
|
|
|
|
# Confidence in inference
|
|
confidence: float = Field(
|
|
0.8,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Confidence in the intent inference"
|
|
)
|
|
needs_clarification: bool = Field(
|
|
False,
|
|
description="True if AI is uncertain and needs user clarification"
|
|
)
|
|
clarifying_questions: List[str] = Field(
|
|
default_factory=list,
|
|
description="Questions to ask user if uncertain"
|
|
)
|
|
|
|
class Config:
|
|
use_enum_values = True
|
|
|
|
|
|
class ResearchQuery(BaseModel):
|
|
"""A targeted research query with purpose."""
|
|
query: str = Field(..., description="The search query")
|
|
purpose: ExpectedDeliverable = Field(..., description="What this query targets")
|
|
provider: str = Field("exa", description="Preferred provider: exa, tavily, google")
|
|
priority: int = Field(1, ge=1, le=5, description="Priority 1-5, higher = more important")
|
|
expected_results: str = Field(..., description="What we expect to find with this query")
|
|
|
|
|
|
class IntentInferenceRequest(BaseModel):
|
|
"""Request to infer research intent from user input."""
|
|
user_input: str = Field(..., description="User's keywords, question, or goal")
|
|
keywords: List[str] = Field(default_factory=list, description="Extracted keywords")
|
|
use_persona: bool = Field(True, description="Use research persona for context")
|
|
use_competitor_data: bool = Field(True, description="Use competitor data for context")
|
|
|
|
|
|
class IntentInferenceResponse(BaseModel):
|
|
"""Response from intent inference."""
|
|
success: bool = True
|
|
intent: ResearchIntent
|
|
analysis_summary: str = Field(..., description="AI's understanding of user intent")
|
|
suggested_queries: List[ResearchQuery] = Field(
|
|
default_factory=list,
|
|
description="Generated research queries based on intent"
|
|
)
|
|
suggested_keywords: List[str] = Field(
|
|
default_factory=list,
|
|
description="Enhanced/expanded keywords"
|
|
)
|
|
suggested_angles: List[str] = Field(
|
|
default_factory=list,
|
|
description="Research angles to explore"
|
|
)
|
|
quick_options: List[Dict[str, Any]] = Field(
|
|
default_factory=list,
|
|
description="Quick options for user to confirm/modify intent"
|
|
)
|
|
|
|
|
|
# ============================================================================
|
|
# Intent-Driven Research Result
|
|
# ============================================================================
|
|
|
|
class IntentDrivenResearchResult(BaseModel):
|
|
"""
|
|
Research results organized by what user needs.
|
|
This is the final output after intent-aware analysis.
|
|
"""
|
|
|
|
success: bool = True
|
|
|
|
# Direct answers
|
|
primary_answer: str = Field(..., description="Direct answer to primary question")
|
|
secondary_answers: Dict[str, str] = Field(
|
|
default_factory=dict,
|
|
description="Answers to secondary questions (question → answer)"
|
|
)
|
|
|
|
# Deliverables (populated based on user's expected_deliverables)
|
|
statistics: List[StatisticWithCitation] = Field(default_factory=list)
|
|
expert_quotes: List[ExpertQuote] = Field(default_factory=list)
|
|
case_studies: List[CaseStudySummary] = Field(default_factory=list)
|
|
comparisons: List[ComparisonTable] = Field(default_factory=list)
|
|
trends: List[TrendAnalysis] = Field(default_factory=list)
|
|
best_practices: List[str] = Field(default_factory=list)
|
|
step_by_step: List[str] = Field(default_factory=list)
|
|
pros_cons: Optional[ProsCons] = None
|
|
definitions: Dict[str, str] = Field(
|
|
default_factory=dict,
|
|
description="Term → definition mappings"
|
|
)
|
|
examples: List[str] = Field(default_factory=list)
|
|
predictions: List[str] = Field(default_factory=list)
|
|
|
|
# Content-ready outputs
|
|
executive_summary: str = Field("", description="2-3 sentence summary")
|
|
key_takeaways: List[str] = Field(
|
|
default_factory=list,
|
|
description="5-7 key bullet points"
|
|
)
|
|
suggested_outline: List[str] = Field(
|
|
default_factory=list,
|
|
description="Suggested content outline if creating content"
|
|
)
|
|
|
|
# Supporting data
|
|
sources: List[SourceWithRelevance] = Field(default_factory=list)
|
|
raw_content: Optional[str] = Field(None, description="Raw content for further processing")
|
|
|
|
# Research quality metadata
|
|
confidence: float = Field(0.8, ge=0.0, le=1.0)
|
|
gaps_identified: List[str] = Field(
|
|
default_factory=list,
|
|
description="What we couldn't find"
|
|
)
|
|
follow_up_queries: List[str] = Field(
|
|
default_factory=list,
|
|
description="Suggested additional research"
|
|
)
|
|
|
|
# Original intent for reference
|
|
original_intent: Optional[ResearchIntent] = None
|
|
|
|
# Error handling
|
|
error_message: Optional[str] = None
|
|
|
|
class Config:
|
|
use_enum_values = True
|
|
|