Base code

This commit is contained in:
Kunthawat Greethong
2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions

View File

@@ -0,0 +1,355 @@
"""
Research Intent Models
Pydantic models for understanding user research intent.
These models capture what the user actually wants to accomplish from their research,
enabling targeted query generation and intent-aware result analysis.
Author: ALwrity Team
Version: 1.0
"""
from enum import Enum
from typing import Dict, Any, List, Optional, Union
from pydantic import BaseModel, Field
from datetime import datetime
class ResearchPurpose(str, Enum):
"""Why is the user researching?"""
LEARN = "learn" # Understand a topic for personal knowledge
CREATE_CONTENT = "create_content" # Write article/blog/podcast/video
MAKE_DECISION = "make_decision" # Choose between options
COMPARE = "compare" # Compare alternatives/competitors
SOLVE_PROBLEM = "solve_problem" # Find solution to a problem
FIND_DATA = "find_data" # Get statistics/facts/citations
EXPLORE_TRENDS = "explore_trends" # Understand market/industry trends
VALIDATE = "validate" # Verify claims/information
GENERATE_IDEAS = "generate_ideas" # Brainstorm content ideas
class ContentOutput(str, Enum):
"""What content type will be created from this research?"""
BLOG = "blog"
PODCAST = "podcast"
VIDEO = "video"
SOCIAL_POST = "social_post"
NEWSLETTER = "newsletter"
PRESENTATION = "presentation"
REPORT = "report"
WHITEPAPER = "whitepaper"
EMAIL = "email"
GENERAL = "general" # No specific output
class ExpectedDeliverable(str, Enum):
"""What specific outputs the user expects from research."""
KEY_STATISTICS = "key_statistics" # Numbers, data points, percentages
EXPERT_QUOTES = "expert_quotes" # Authoritative statements
CASE_STUDIES = "case_studies" # Real examples and success stories
COMPARISONS = "comparisons" # Side-by-side analysis
TRENDS = "trends" # Market/industry trends
BEST_PRACTICES = "best_practices" # Recommendations and guidelines
STEP_BY_STEP = "step_by_step" # Process/how-to instructions
PROS_CONS = "pros_cons" # Advantages/disadvantages
DEFINITIONS = "definitions" # Clear explanations of concepts
CITATIONS = "citations" # Authoritative sources
EXAMPLES = "examples" # Concrete examples
PREDICTIONS = "predictions" # Future outlook
class ResearchDepthLevel(str, Enum):
"""How deep the research should go."""
OVERVIEW = "overview" # Quick summary, surface level
DETAILED = "detailed" # In-depth analysis
EXPERT = "expert" # Comprehensive, expert-level research
class InputType(str, Enum):
"""Type of user input detected."""
KEYWORDS = "keywords" # Simple keywords: "AI healthcare 2025"
QUESTION = "question" # A question: "What are the best AI tools?"
GOAL = "goal" # Goal statement: "I need to write a blog about..."
MIXED = "mixed" # Combination of above
# ============================================================================
# Structured Deliverable Models
# ============================================================================
class StatisticWithCitation(BaseModel):
"""A statistic with full attribution."""
statistic: str = Field(..., description="The full statistical statement")
value: Optional[str] = Field(None, description="The numeric value (e.g., '72%')")
context: str = Field(..., description="Context of when/where this was measured")
source: str = Field(..., description="Source name/publication")
url: str = Field(..., description="Source URL")
credibility: float = Field(0.8, ge=0.0, le=1.0, description="Credibility score 0-1")
recency: Optional[str] = Field(None, description="How recent the data is")
class ExpertQuote(BaseModel):
"""A quote from an authoritative source."""
quote: str = Field(..., description="The actual quote")
speaker: str = Field(..., description="Name of the speaker")
title: Optional[str] = Field(None, description="Title/role of the speaker")
organization: Optional[str] = Field(None, description="Organization/company")
context: Optional[str] = Field(None, description="Context of the quote")
source: str = Field(..., description="Source name")
url: str = Field(..., description="Source URL")
class CaseStudySummary(BaseModel):
"""Summary of a case study."""
title: str = Field(..., description="Case study title")
organization: str = Field(..., description="Organization featured")
challenge: str = Field(..., description="The challenge/problem faced")
solution: str = Field(..., description="The solution implemented")
outcome: str = Field(..., description="The results achieved")
key_metrics: List[str] = Field(default_factory=list, description="Key metrics/numbers")
source: str = Field(..., description="Source name")
url: str = Field(..., description="Source URL")
class TrendAnalysis(BaseModel):
"""Analysis of a trend."""
trend: str = Field(..., description="The trend description")
direction: str = Field(..., description="growing, declining, emerging, stable")
evidence: List[str] = Field(default_factory=list, description="Supporting evidence")
impact: Optional[str] = Field(None, description="Potential impact")
timeline: Optional[str] = Field(None, description="Timeline of the trend")
sources: List[str] = Field(default_factory=list, description="Source URLs")
class ComparisonItem(BaseModel):
"""An item in a comparison."""
name: str
description: Optional[str] = None
pros: List[str] = Field(default_factory=list)
cons: List[str] = Field(default_factory=list)
features: Dict[str, str] = Field(default_factory=dict)
rating: Optional[float] = None
source: Optional[str] = None
class ComparisonTable(BaseModel):
"""Comparison between options."""
title: str = Field(..., description="Comparison title")
criteria: List[str] = Field(default_factory=list, description="Comparison criteria")
items: List[ComparisonItem] = Field(default_factory=list, description="Items being compared")
winner: Optional[str] = Field(None, description="Recommended option if applicable")
verdict: Optional[str] = Field(None, description="Summary verdict")
class ProsCons(BaseModel):
"""Pros and cons analysis."""
subject: str = Field(..., description="What is being analyzed")
pros: List[str] = Field(default_factory=list, description="Advantages")
cons: List[str] = Field(default_factory=list, description="Disadvantages")
balanced_verdict: str = Field(..., description="Balanced conclusion")
class SourceWithRelevance(BaseModel):
"""A source with relevance information."""
title: str
url: str
excerpt: Optional[str] = None
relevance_score: float = Field(0.8, ge=0.0, le=1.0)
relevance_reason: Optional[str] = None
content_type: Optional[str] = None # article, research paper, news, etc.
published_date: Optional[str] = None
credibility_score: float = Field(0.8, ge=0.0, le=1.0)
# ============================================================================
# Intent Models
# ============================================================================
class ResearchIntent(BaseModel):
"""
What the user actually wants from their research.
This is inferred from user input + research persona.
"""
# Core understanding
primary_question: str = Field(..., description="The main question to answer")
secondary_questions: List[str] = Field(
default_factory=list,
description="Related questions that should be answered"
)
# Purpose classification
purpose: ResearchPurpose = Field(
ResearchPurpose.LEARN,
description="Why the user is researching"
)
content_output: ContentOutput = Field(
ContentOutput.GENERAL,
description="What content type will be created"
)
# What they need from results
expected_deliverables: List[ExpectedDeliverable] = Field(
default_factory=list,
description="Specific outputs the user expects"
)
# Depth and focus
depth: ResearchDepthLevel = Field(
ResearchDepthLevel.DETAILED,
description="How deep the research should go"
)
focus_areas: List[str] = Field(
default_factory=list,
description="Specific aspects to focus on"
)
# Constraints
perspective: Optional[str] = Field(
None,
description="Perspective to research from (e.g., 'hospital administrator')"
)
time_sensitivity: Optional[str] = Field(
None,
description="Time constraint: 'real_time', 'recent', 'historical', 'evergreen'"
)
# Detected input type
input_type: InputType = Field(
InputType.KEYWORDS,
description="Type of user input detected"
)
# Original user input (for reference)
original_input: str = Field(..., description="The original user input")
# Confidence in inference
confidence: float = Field(
0.8,
ge=0.0,
le=1.0,
description="Confidence in the intent inference"
)
needs_clarification: bool = Field(
False,
description="True if AI is uncertain and needs user clarification"
)
clarifying_questions: List[str] = Field(
default_factory=list,
description="Questions to ask user if uncertain"
)
class Config:
use_enum_values = True
class ResearchQuery(BaseModel):
"""A targeted research query with purpose."""
query: str = Field(..., description="The search query")
purpose: ExpectedDeliverable = Field(..., description="What this query targets")
provider: str = Field("exa", description="Preferred provider: exa, tavily, google")
priority: int = Field(1, ge=1, le=5, description="Priority 1-5, higher = more important")
expected_results: str = Field(..., description="What we expect to find with this query")
class IntentInferenceRequest(BaseModel):
"""Request to infer research intent from user input."""
user_input: str = Field(..., description="User's keywords, question, or goal")
keywords: List[str] = Field(default_factory=list, description="Extracted keywords")
use_persona: bool = Field(True, description="Use research persona for context")
use_competitor_data: bool = Field(True, description="Use competitor data for context")
class IntentInferenceResponse(BaseModel):
"""Response from intent inference."""
success: bool = True
intent: ResearchIntent
analysis_summary: str = Field(..., description="AI's understanding of user intent")
suggested_queries: List[ResearchQuery] = Field(
default_factory=list,
description="Generated research queries based on intent"
)
suggested_keywords: List[str] = Field(
default_factory=list,
description="Enhanced/expanded keywords"
)
suggested_angles: List[str] = Field(
default_factory=list,
description="Research angles to explore"
)
quick_options: List[Dict[str, Any]] = Field(
default_factory=list,
description="Quick options for user to confirm/modify intent"
)
# ============================================================================
# Intent-Driven Research Result
# ============================================================================
class IntentDrivenResearchResult(BaseModel):
"""
Research results organized by what user needs.
This is the final output after intent-aware analysis.
"""
success: bool = True
# Direct answers
primary_answer: str = Field(..., description="Direct answer to primary question")
secondary_answers: Dict[str, str] = Field(
default_factory=dict,
description="Answers to secondary questions (question → answer)"
)
# Deliverables (populated based on user's expected_deliverables)
statistics: List[StatisticWithCitation] = Field(default_factory=list)
expert_quotes: List[ExpertQuote] = Field(default_factory=list)
case_studies: List[CaseStudySummary] = Field(default_factory=list)
comparisons: List[ComparisonTable] = Field(default_factory=list)
trends: List[TrendAnalysis] = Field(default_factory=list)
best_practices: List[str] = Field(default_factory=list)
step_by_step: List[str] = Field(default_factory=list)
pros_cons: Optional[ProsCons] = None
definitions: Dict[str, str] = Field(
default_factory=dict,
description="Term → definition mappings"
)
examples: List[str] = Field(default_factory=list)
predictions: List[str] = Field(default_factory=list)
# Content-ready outputs
executive_summary: str = Field("", description="2-3 sentence summary")
key_takeaways: List[str] = Field(
default_factory=list,
description="5-7 key bullet points"
)
suggested_outline: List[str] = Field(
default_factory=list,
description="Suggested content outline if creating content"
)
# Supporting data
sources: List[SourceWithRelevance] = Field(default_factory=list)
raw_content: Optional[str] = Field(None, description="Raw content for further processing")
# Research quality metadata
confidence: float = Field(0.8, ge=0.0, le=1.0)
gaps_identified: List[str] = Field(
default_factory=list,
description="What we couldn't find"
)
follow_up_queries: List[str] = Field(
default_factory=list,
description="Suggested additional research"
)
# Original intent for reference
original_intent: Optional[ResearchIntent] = None
# Error handling
error_message: Optional[str] = None
class Config:
use_enum_values = True