Base code

This commit is contained in:
Kunthawat Greethong
2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
"""
Research Intent Package
This package provides intent-driven research capabilities:
- Intent inference from user input
- Targeted query generation
- Intent-aware result analysis
Author: ALwrity Team
Version: 1.0
"""
from .research_intent_inference import ResearchIntentInference
from .intent_query_generator import IntentQueryGenerator
from .intent_aware_analyzer import IntentAwareAnalyzer
from .intent_prompt_builder import IntentPromptBuilder
__all__ = [
"ResearchIntentInference",
"IntentQueryGenerator",
"IntentAwareAnalyzer",
"IntentPromptBuilder",
]

View File

@@ -0,0 +1,547 @@
"""
Intent-Aware Result Analyzer
Analyzes research results based on user intent.
Extracts exactly what the user needs from raw research data.
This is the key innovation - instead of generic analysis,
we analyze results through the lens of what the user wants to accomplish.
Author: ALwrity Team
Version: 1.0
"""
import json
from typing import Dict, Any, List, Optional
from loguru import logger
from models.research_intent_models import (
ResearchIntent,
IntentDrivenResearchResult,
ExpectedDeliverable,
StatisticWithCitation,
ExpertQuote,
CaseStudySummary,
TrendAnalysis,
ComparisonTable,
ComparisonItem,
ProsCons,
SourceWithRelevance,
)
from models.research_persona_models import ResearchPersona
from .intent_prompt_builder import IntentPromptBuilder
class IntentAwareAnalyzer:
"""
Analyzes research results based on user intent.
Instead of generic summaries, this extracts exactly what the user
needs: statistics, quotes, case studies, trends, etc.
"""
def __init__(self):
"""Initialize the analyzer."""
self.prompt_builder = IntentPromptBuilder()
logger.info("IntentAwareAnalyzer initialized")
async def analyze(
self,
raw_results: Dict[str, Any],
intent: ResearchIntent,
research_persona: Optional[ResearchPersona] = None,
) -> IntentDrivenResearchResult:
"""
Analyze raw research results based on user intent.
Args:
raw_results: Raw results from Exa/Tavily/Google
intent: The user's research intent
research_persona: Optional persona for context
Returns:
IntentDrivenResearchResult with extracted deliverables
"""
try:
logger.info(f"Analyzing results for intent: {intent.primary_question[:50]}...")
# Format raw results for analysis
formatted_results = self._format_raw_results(raw_results)
# Build the analysis prompt
prompt = self.prompt_builder.build_intent_aware_analysis_prompt(
raw_results=formatted_results,
intent=intent,
research_persona=research_persona,
)
# Define the expected JSON schema
analysis_schema = self._build_analysis_schema(intent.expected_deliverables)
# Call LLM for analysis
from services.llm_providers.main_text_generation import llm_text_gen
result = llm_text_gen(
prompt=prompt,
json_struct=analysis_schema,
user_id=None
)
if isinstance(result, dict) and "error" in result:
logger.error(f"Intent-aware analysis failed: {result.get('error')}")
return self._create_fallback_result(raw_results, intent)
# Parse and validate the result
analyzed_result = self._parse_analysis_result(result, intent, raw_results)
logger.info(
f"Analysis complete: {len(analyzed_result.key_takeaways)} takeaways, "
f"{len(analyzed_result.statistics)} stats, "
f"{len(analyzed_result.sources)} sources"
)
return analyzed_result
except Exception as e:
logger.error(f"Error in intent-aware analysis: {e}")
return self._create_fallback_result(raw_results, intent)
def _format_raw_results(self, raw_results: Dict[str, Any]) -> str:
"""Format raw research results for LLM analysis."""
formatted_parts = []
# Extract content
content = raw_results.get("content", "")
if content:
formatted_parts.append(f"=== MAIN CONTENT ===\n{content[:8000]}")
# Extract sources with their content
sources = raw_results.get("sources", [])
if sources:
formatted_parts.append("\n=== SOURCES ===")
for i, source in enumerate(sources[:15], 1): # Limit to 15 sources
title = source.get("title", "Untitled")
url = source.get("url", "")
excerpt = source.get("excerpt", source.get("text", source.get("content", "")))
formatted_parts.append(f"\nSource {i}: {title}")
formatted_parts.append(f"URL: {url}")
if excerpt:
formatted_parts.append(f"Content: {excerpt[:500]}")
# Extract grounding metadata if available (from Google)
grounding = raw_results.get("grounding_metadata", {})
if grounding:
formatted_parts.append("\n=== GROUNDING DATA ===")
formatted_parts.append(json.dumps(grounding, indent=2)[:2000])
# Extract any AI answers (from Tavily)
answer = raw_results.get("answer", "")
if answer:
formatted_parts.append(f"\n=== AI-GENERATED ANSWER ===\n{answer}")
return "\n".join(formatted_parts)
def _build_analysis_schema(self, expected_deliverables: List[str]) -> Dict[str, Any]:
"""Build JSON schema based on expected deliverables."""
# Base schema
schema = {
"type": "object",
"properties": {
"primary_answer": {"type": "string"},
"secondary_answers": {
"type": "object",
"additionalProperties": {"type": "string"}
},
"executive_summary": {"type": "string"},
"key_takeaways": {
"type": "array",
"items": {"type": "string"},
"maxItems": 7
},
"confidence": {"type": "number"},
"gaps_identified": {
"type": "array",
"items": {"type": "string"}
},
"follow_up_queries": {
"type": "array",
"items": {"type": "string"}
},
},
"required": ["primary_answer", "executive_summary", "key_takeaways", "confidence"]
}
# Add deliverable-specific properties
if ExpectedDeliverable.KEY_STATISTICS.value in expected_deliverables:
schema["properties"]["statistics"] = {
"type": "array",
"items": {
"type": "object",
"properties": {
"statistic": {"type": "string"},
"value": {"type": "string"},
"context": {"type": "string"},
"source": {"type": "string"},
"url": {"type": "string"},
"credibility": {"type": "number"},
"recency": {"type": "string"}
},
"required": ["statistic", "context", "source", "url"]
}
}
if ExpectedDeliverable.EXPERT_QUOTES.value in expected_deliverables:
schema["properties"]["expert_quotes"] = {
"type": "array",
"items": {
"type": "object",
"properties": {
"quote": {"type": "string"},
"speaker": {"type": "string"},
"title": {"type": "string"},
"organization": {"type": "string"},
"source": {"type": "string"},
"url": {"type": "string"}
},
"required": ["quote", "speaker", "source", "url"]
}
}
if ExpectedDeliverable.CASE_STUDIES.value in expected_deliverables:
schema["properties"]["case_studies"] = {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"organization": {"type": "string"},
"challenge": {"type": "string"},
"solution": {"type": "string"},
"outcome": {"type": "string"},
"key_metrics": {"type": "array", "items": {"type": "string"}},
"source": {"type": "string"},
"url": {"type": "string"}
},
"required": ["title", "organization", "challenge", "solution", "outcome"]
}
}
if ExpectedDeliverable.TRENDS.value in expected_deliverables:
schema["properties"]["trends"] = {
"type": "array",
"items": {
"type": "object",
"properties": {
"trend": {"type": "string"},
"direction": {"type": "string"},
"evidence": {"type": "array", "items": {"type": "string"}},
"impact": {"type": "string"},
"timeline": {"type": "string"},
"sources": {"type": "array", "items": {"type": "string"}}
},
"required": ["trend", "direction", "evidence"]
}
}
if ExpectedDeliverable.COMPARISONS.value in expected_deliverables:
schema["properties"]["comparisons"] = {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"criteria": {"type": "array", "items": {"type": "string"}},
"items": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"pros": {"type": "array", "items": {"type": "string"}},
"cons": {"type": "array", "items": {"type": "string"}},
"features": {"type": "object"}
}
}
},
"verdict": {"type": "string"}
}
}
}
if ExpectedDeliverable.PROS_CONS.value in expected_deliverables:
schema["properties"]["pros_cons"] = {
"type": "object",
"properties": {
"subject": {"type": "string"},
"pros": {"type": "array", "items": {"type": "string"}},
"cons": {"type": "array", "items": {"type": "string"}},
"balanced_verdict": {"type": "string"}
}
}
if ExpectedDeliverable.BEST_PRACTICES.value in expected_deliverables:
schema["properties"]["best_practices"] = {
"type": "array",
"items": {"type": "string"}
}
if ExpectedDeliverable.STEP_BY_STEP.value in expected_deliverables:
schema["properties"]["step_by_step"] = {
"type": "array",
"items": {"type": "string"}
}
if ExpectedDeliverable.DEFINITIONS.value in expected_deliverables:
schema["properties"]["definitions"] = {
"type": "object",
"additionalProperties": {"type": "string"}
}
if ExpectedDeliverable.EXAMPLES.value in expected_deliverables:
schema["properties"]["examples"] = {
"type": "array",
"items": {"type": "string"}
}
if ExpectedDeliverable.PREDICTIONS.value in expected_deliverables:
schema["properties"]["predictions"] = {
"type": "array",
"items": {"type": "string"}
}
# Always include sources and suggested outline
schema["properties"]["sources"] = {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"url": {"type": "string"},
"relevance_score": {"type": "number"},
"relevance_reason": {"type": "string"},
"content_type": {"type": "string"},
"credibility_score": {"type": "number"}
},
"required": ["title", "url"]
}
}
schema["properties"]["suggested_outline"] = {
"type": "array",
"items": {"type": "string"}
}
return schema
def _parse_analysis_result(
self,
result: Dict[str, Any],
intent: ResearchIntent,
raw_results: Dict[str, Any],
) -> IntentDrivenResearchResult:
"""Parse LLM analysis result into structured format."""
# Parse statistics
statistics = []
for stat in result.get("statistics", []):
try:
statistics.append(StatisticWithCitation(
statistic=stat.get("statistic", ""),
value=stat.get("value"),
context=stat.get("context", ""),
source=stat.get("source", ""),
url=stat.get("url", ""),
credibility=float(stat.get("credibility", 0.8)),
recency=stat.get("recency"),
))
except Exception as e:
logger.warning(f"Failed to parse statistic: {e}")
# Parse expert quotes
expert_quotes = []
for quote in result.get("expert_quotes", []):
try:
expert_quotes.append(ExpertQuote(
quote=quote.get("quote", ""),
speaker=quote.get("speaker", ""),
title=quote.get("title"),
organization=quote.get("organization"),
context=quote.get("context"),
source=quote.get("source", ""),
url=quote.get("url", ""),
))
except Exception as e:
logger.warning(f"Failed to parse expert quote: {e}")
# Parse case studies
case_studies = []
for cs in result.get("case_studies", []):
try:
case_studies.append(CaseStudySummary(
title=cs.get("title", ""),
organization=cs.get("organization", ""),
challenge=cs.get("challenge", ""),
solution=cs.get("solution", ""),
outcome=cs.get("outcome", ""),
key_metrics=cs.get("key_metrics", []),
source=cs.get("source", ""),
url=cs.get("url", ""),
))
except Exception as e:
logger.warning(f"Failed to parse case study: {e}")
# Parse trends
trends = []
for trend in result.get("trends", []):
try:
trends.append(TrendAnalysis(
trend=trend.get("trend", ""),
direction=trend.get("direction", "growing"),
evidence=trend.get("evidence", []),
impact=trend.get("impact"),
timeline=trend.get("timeline"),
sources=trend.get("sources", []),
))
except Exception as e:
logger.warning(f"Failed to parse trend: {e}")
# Parse comparisons
comparisons = []
for comp in result.get("comparisons", []):
try:
items = []
for item in comp.get("items", []):
items.append(ComparisonItem(
name=item.get("name", ""),
description=item.get("description"),
pros=item.get("pros", []),
cons=item.get("cons", []),
features=item.get("features", {}),
rating=item.get("rating"),
source=item.get("source"),
))
comparisons.append(ComparisonTable(
title=comp.get("title", ""),
criteria=comp.get("criteria", []),
items=items,
winner=comp.get("winner"),
verdict=comp.get("verdict"),
))
except Exception as e:
logger.warning(f"Failed to parse comparison: {e}")
# Parse pros/cons
pros_cons = None
pc_data = result.get("pros_cons")
if pc_data:
try:
pros_cons = ProsCons(
subject=pc_data.get("subject", intent.original_input),
pros=pc_data.get("pros", []),
cons=pc_data.get("cons", []),
balanced_verdict=pc_data.get("balanced_verdict", ""),
)
except Exception as e:
logger.warning(f"Failed to parse pros/cons: {e}")
# Parse sources
sources = []
for src in result.get("sources", []):
try:
sources.append(SourceWithRelevance(
title=src.get("title", ""),
url=src.get("url", ""),
excerpt=src.get("excerpt"),
relevance_score=float(src.get("relevance_score", 0.8)),
relevance_reason=src.get("relevance_reason"),
content_type=src.get("content_type"),
published_date=src.get("published_date"),
credibility_score=float(src.get("credibility_score", 0.8)),
))
except Exception as e:
logger.warning(f"Failed to parse source: {e}")
# If no sources from analysis, extract from raw results
if not sources:
sources = self._extract_sources_from_raw(raw_results)
return IntentDrivenResearchResult(
success=True,
primary_answer=result.get("primary_answer", ""),
secondary_answers=result.get("secondary_answers", {}),
statistics=statistics,
expert_quotes=expert_quotes,
case_studies=case_studies,
comparisons=comparisons,
trends=trends,
best_practices=result.get("best_practices", []),
step_by_step=result.get("step_by_step", []),
pros_cons=pros_cons,
definitions=result.get("definitions", {}),
examples=result.get("examples", []),
predictions=result.get("predictions", []),
executive_summary=result.get("executive_summary", ""),
key_takeaways=result.get("key_takeaways", []),
suggested_outline=result.get("suggested_outline", []),
sources=sources,
raw_content=self._format_raw_results(raw_results)[:5000],
confidence=float(result.get("confidence", 0.7)),
gaps_identified=result.get("gaps_identified", []),
follow_up_queries=result.get("follow_up_queries", []),
original_intent=intent,
)
def _extract_sources_from_raw(self, raw_results: Dict[str, Any]) -> List[SourceWithRelevance]:
"""Extract sources from raw results when analysis doesn't provide them."""
sources = []
for src in raw_results.get("sources", [])[:10]:
try:
sources.append(SourceWithRelevance(
title=src.get("title", "Untitled"),
url=src.get("url", ""),
excerpt=src.get("excerpt", src.get("text", ""))[:200],
relevance_score=0.8,
credibility_score=float(src.get("credibility_score", 0.8)),
))
except Exception as e:
logger.warning(f"Failed to extract source: {e}")
return sources
def _create_fallback_result(
self,
raw_results: Dict[str, Any],
intent: ResearchIntent,
) -> IntentDrivenResearchResult:
"""Create a fallback result when AI analysis fails."""
# Extract basic information from raw results
content = raw_results.get("content", "")
sources = self._extract_sources_from_raw(raw_results)
# Create basic takeaways from content
key_takeaways = []
if content:
sentences = content.split(". ")[:5]
key_takeaways = [s.strip() + "." for s in sentences if len(s) > 20]
return IntentDrivenResearchResult(
success=True,
primary_answer=f"Research findings for: {intent.primary_question}",
secondary_answers={},
executive_summary=content[:300] if content else "Research completed",
key_takeaways=key_takeaways,
sources=sources,
raw_content=self._format_raw_results(raw_results)[:5000],
confidence=0.5,
gaps_identified=[
"AI analysis failed - showing raw results",
"Manual review recommended"
],
follow_up_queries=[],
original_intent=intent,
)

View File

@@ -0,0 +1,627 @@
"""
Intent Prompt Builder
Builds comprehensive AI prompts for:
1. Intent inference from user input
2. Targeted query generation
3. Intent-aware result analysis
Author: ALwrity Team
Version: 1.0
"""
import json
from typing import Dict, Any, List, Optional
from loguru import logger
from models.research_intent_models import (
ResearchIntent,
ResearchPurpose,
ContentOutput,
ExpectedDeliverable,
ResearchDepthLevel,
)
from models.research_persona_models import ResearchPersona
class IntentPromptBuilder:
"""Builds prompts for intent-driven research."""
# Purpose explanations for the AI
PURPOSE_EXPLANATIONS = {
ResearchPurpose.LEARN: "User wants to understand a topic for personal knowledge",
ResearchPurpose.CREATE_CONTENT: "User will create content (blog, video, podcast) from this research",
ResearchPurpose.MAKE_DECISION: "User needs to make a choice/decision based on research",
ResearchPurpose.COMPARE: "User wants to compare alternatives or competitors",
ResearchPurpose.SOLVE_PROBLEM: "User is looking for a solution to a specific problem",
ResearchPurpose.FIND_DATA: "User needs specific statistics, facts, or citations",
ResearchPurpose.EXPLORE_TRENDS: "User wants to understand current/future trends",
ResearchPurpose.VALIDATE: "User wants to verify or fact-check information",
ResearchPurpose.GENERATE_IDEAS: "User wants to brainstorm content ideas",
}
# Deliverable descriptions
DELIVERABLE_DESCRIPTIONS = {
ExpectedDeliverable.KEY_STATISTICS: "Numbers, percentages, data points with citations",
ExpectedDeliverable.EXPERT_QUOTES: "Authoritative quotes from industry experts",
ExpectedDeliverable.CASE_STUDIES: "Real examples and success stories",
ExpectedDeliverable.COMPARISONS: "Side-by-side analysis tables",
ExpectedDeliverable.TRENDS: "Current and emerging industry trends",
ExpectedDeliverable.BEST_PRACTICES: "Recommended approaches and guidelines",
ExpectedDeliverable.STEP_BY_STEP: "Process guides and how-to instructions",
ExpectedDeliverable.PROS_CONS: "Advantages and disadvantages analysis",
ExpectedDeliverable.DEFINITIONS: "Clear explanations of concepts and terms",
ExpectedDeliverable.CITATIONS: "Authoritative sources for reference",
ExpectedDeliverable.EXAMPLES: "Concrete examples to illustrate points",
ExpectedDeliverable.PREDICTIONS: "Future outlook and predictions",
}
def build_intent_inference_prompt(
self,
user_input: str,
keywords: List[str],
research_persona: Optional[ResearchPersona] = None,
competitor_data: Optional[List[Dict]] = None,
industry: Optional[str] = None,
target_audience: Optional[str] = None,
) -> str:
"""
Build prompt for inferring user's research intent.
This prompt analyzes the user's input and determines:
- What they want to accomplish
- What questions they need answered
- What specific deliverables they need
"""
# Build persona context
persona_context = self._build_persona_context(research_persona, industry, target_audience)
# Build competitor context
competitor_context = self._build_competitor_context(competitor_data)
prompt = f"""You are an expert research intent analyzer. Your job is to understand what a content creator REALLY needs from their research.
## USER INPUT
"{user_input}"
{f"KEYWORDS: {', '.join(keywords)}" if keywords else ""}
## USER CONTEXT
{persona_context}
{competitor_context}
## YOUR TASK
Analyze the user's input and infer their research intent. Determine:
1. **INPUT TYPE**: Is this:
- "keywords": Simple topic keywords (e.g., "AI healthcare 2025")
- "question": A specific question (e.g., "What are the best AI tools for healthcare?")
- "goal": A goal statement (e.g., "I need to write a blog about AI in healthcare")
- "mixed": Combination of above
2. **PRIMARY QUESTION**: What is the main question to answer? Convert their input into a clear question.
3. **SECONDARY QUESTIONS**: What related questions should also be answered? (3-5 questions)
4. **PURPOSE**: Why are they researching? Choose ONE:
- "learn": Understand a topic for personal knowledge
- "create_content": Create content (blog, video, podcast)
- "make_decision": Make a choice between options
- "compare": Compare alternatives/competitors
- "solve_problem": Find a solution
- "find_data": Get specific statistics/facts
- "explore_trends": Understand industry trends
- "validate": Verify claims/information
- "generate_ideas": Brainstorm ideas
5. **CONTENT OUTPUT**: What will they create? Choose ONE:
- "blog", "podcast", "video", "social_post", "newsletter", "presentation", "report", "whitepaper", "email", "general"
6. **EXPECTED DELIVERABLES**: What specific outputs do they need? Choose ALL that apply:
- "key_statistics": Numbers, data points
- "expert_quotes": Authoritative quotes
- "case_studies": Real examples
- "comparisons": Side-by-side analysis
- "trends": Industry trends
- "best_practices": Recommendations
- "step_by_step": How-to guides
- "pros_cons": Advantages/disadvantages
- "definitions": Concept explanations
- "citations": Source references
- "examples": Concrete examples
- "predictions": Future outlook
7. **DEPTH**: How deep should the research go?
- "overview": Quick summary
- "detailed": In-depth analysis
- "expert": Comprehensive expert-level
8. **FOCUS AREAS**: What specific aspects should be researched? (2-4 areas)
9. **PERSPECTIVE**: From whose viewpoint? (e.g., "marketing manager", "small business owner")
10. **TIME SENSITIVITY**: Is recency important?
- "real_time": Latest only (past 24-48 hours)
- "recent": Past week/month
- "historical": Include older content
- "evergreen": Timeless content
11. **CONFIDENCE**: How confident are you in this inference? (0.0-1.0)
- If < 0.7, set needs_clarification to true and provide clarifying_questions
## OUTPUT FORMAT
Return a JSON object:
```json
{{
"input_type": "keywords|question|goal|mixed",
"primary_question": "The main question to answer",
"secondary_questions": ["question 1", "question 2", "question 3"],
"purpose": "one of the purpose options",
"content_output": "one of the content options",
"expected_deliverables": ["deliverable1", "deliverable2"],
"depth": "overview|detailed|expert",
"focus_areas": ["area1", "area2"],
"perspective": "target perspective or null",
"time_sensitivity": "real_time|recent|historical|evergreen",
"confidence": 0.85,
"needs_clarification": false,
"clarifying_questions": [],
"analysis_summary": "Brief summary of what the user wants"
}}
```
## IMPORTANT RULES
1. Always convert vague input into a specific primary question
2. Infer deliverables based on purpose (e.g., create_content → statistics + examples)
3. Use persona context to refine perspective and focus areas
4. If input is ambiguous, provide clarifying questions
5. Default to "detailed" depth unless input suggests otherwise
6. For content creation, include relevant deliverables automatically
"""
return prompt
def build_query_generation_prompt(
self,
intent: ResearchIntent,
research_persona: Optional[ResearchPersona] = None,
) -> str:
"""
Build prompt for generating targeted research queries.
Generates multiple queries, each targeting a specific deliverable.
"""
deliverables_list = "\n".join([
f"- {d}: {self.DELIVERABLE_DESCRIPTIONS.get(ExpectedDeliverable(d), d)}"
for d in intent.expected_deliverables
])
persona_keywords = ""
if research_persona and research_persona.suggested_keywords:
persona_keywords = f"\nSUGGESTED KEYWORDS FROM PERSONA: {', '.join(research_persona.suggested_keywords[:10])}"
prompt = f"""You are a research query optimizer. Generate multiple targeted search queries based on the user's research intent.
## RESEARCH INTENT
PRIMARY QUESTION: {intent.primary_question}
SECONDARY QUESTIONS:
{chr(10).join(f'- {q}' for q in intent.secondary_questions) if intent.secondary_questions else 'None'}
PURPOSE: {intent.purpose} - {self.PURPOSE_EXPLANATIONS.get(ResearchPurpose(intent.purpose), intent.purpose)}
CONTENT OUTPUT: {intent.content_output}
EXPECTED DELIVERABLES:
{deliverables_list}
DEPTH: {intent.depth}
FOCUS AREAS: {', '.join(intent.focus_areas) if intent.focus_areas else 'General'}
PERSPECTIVE: {intent.perspective or 'General audience'}
TIME SENSITIVITY: {intent.time_sensitivity or 'No specific requirement'}
{persona_keywords}
## YOUR TASK
Generate 4-8 targeted research queries. Each query should:
1. Target a specific deliverable or question
2. Be optimized for semantic search (Exa/Tavily)
3. Include relevant context for better results
For each query, specify:
- The query string
- What deliverable it targets
- Best provider (exa for semantic/deep, tavily for news/real-time, google for factual)
- Priority (1-5, higher = more important)
- What we expect to find
## OUTPUT FORMAT
Return a JSON object:
```json
{{
"queries": [
{{
"query": "Healthcare AI adoption statistics 2025 hospitals implementation data",
"purpose": "key_statistics",
"provider": "exa",
"priority": 5,
"expected_results": "Statistics on hospital AI adoption rates"
}},
{{
"query": "AI healthcare trends predictions future outlook 2025 2026",
"purpose": "trends",
"provider": "tavily",
"priority": 4,
"expected_results": "Current trends and future predictions in healthcare AI"
}}
],
"enhanced_keywords": ["keyword1", "keyword2", "keyword3"],
"research_angles": [
"Angle 1: Focus on adoption challenges",
"Angle 2: Focus on ROI and outcomes"
]
}}
```
## QUERY OPTIMIZATION RULES
1. For STATISTICS: Include words like "statistics", "data", "percentage", "report", "study"
2. For CASE STUDIES: Include "case study", "success story", "implementation", "example"
3. For TRENDS: Include "trends", "future", "predictions", "emerging", year numbers
4. For EXPERT QUOTES: Include expert names if known, or "expert opinion", "interview"
5. For COMPARISONS: Include "vs", "compare", "comparison", "alternative"
6. For NEWS/REAL-TIME: Use Tavily, include recent year/month
7. For ACADEMIC/DEEP: Use Exa with neural search
"""
return prompt
def build_intent_aware_analysis_prompt(
self,
raw_results: str,
intent: ResearchIntent,
research_persona: Optional[ResearchPersona] = None,
) -> str:
"""
Build prompt for analyzing research results based on user intent.
This is the key prompt that extracts exactly what the user needs.
"""
purpose_explanation = self.PURPOSE_EXPLANATIONS.get(
ResearchPurpose(intent.purpose),
intent.purpose
)
deliverables_instructions = self._build_deliverables_instructions(intent.expected_deliverables)
perspective_instruction = ""
if intent.perspective:
perspective_instruction = f"\n**PERSPECTIVE**: Analyze results from the viewpoint of: {intent.perspective}"
prompt = f"""You are a research analyst helping a content creator find exactly what they need. Your job is to analyze raw research results and extract precisely what the user is looking for.
## USER'S RESEARCH INTENT
PRIMARY QUESTION: {intent.primary_question}
SECONDARY QUESTIONS:
{chr(10).join(f'- {q}' for q in intent.secondary_questions) if intent.secondary_questions else 'None specified'}
PURPOSE: {intent.purpose}
{purpose_explanation}
CONTENT OUTPUT: {intent.content_output}
EXPECTED DELIVERABLES: {', '.join(intent.expected_deliverables)}
FOCUS AREAS: {', '.join(intent.focus_areas) if intent.focus_areas else 'General'}
{perspective_instruction}
## RAW RESEARCH RESULTS
{raw_results[:15000]} # Truncated for token limits
## YOUR TASK
Analyze the raw research results and extract EXACTLY what the user needs.
{deliverables_instructions}
## OUTPUT REQUIREMENTS
Provide results in this JSON structure:
```json
{{
"primary_answer": "Direct 2-3 sentence answer to the primary question",
"secondary_answers": {{
"Question 1?": "Answer to question 1",
"Question 2?": "Answer to question 2"
}},
"executive_summary": "2-3 sentence executive summary of all findings",
"key_takeaways": [
"Key takeaway 1 - most important finding",
"Key takeaway 2",
"Key takeaway 3",
"Key takeaway 4",
"Key takeaway 5"
],
"statistics": [
{{
"statistic": "72% of hospitals plan to adopt AI by 2025",
"value": "72%",
"context": "Survey of 500 US hospitals in 2024",
"source": "Healthcare AI Report 2024",
"url": "https://example.com/report",
"credibility": 0.9,
"recency": "2024"
}}
],
"expert_quotes": [
{{
"quote": "AI will revolutionize patient care within 5 years",
"speaker": "Dr. Jane Smith",
"title": "Chief Medical Officer",
"organization": "HealthTech Inc",
"source": "TechCrunch",
"url": "https://example.com/article"
}}
],
"case_studies": [
{{
"title": "Mayo Clinic AI Implementation",
"organization": "Mayo Clinic",
"challenge": "High patient wait times",
"solution": "AI-powered triage system",
"outcome": "40% reduction in wait times",
"key_metrics": ["40% faster triage", "95% patient satisfaction"],
"source": "Healthcare IT News",
"url": "https://example.com"
}}
],
"trends": [
{{
"trend": "AI-assisted diagnostics adoption",
"direction": "growing",
"evidence": ["25% YoY growth", "Major hospital chains investing"],
"impact": "Could reduce misdiagnosis by 30%",
"timeline": "Expected mainstream by 2027",
"sources": ["url1", "url2"]
}}
],
"comparisons": [
{{
"title": "Top AI Healthcare Platforms",
"criteria": ["Cost", "Features", "Support"],
"items": [
{{
"name": "Platform A",
"pros": ["Easy integration", "Good support"],
"cons": ["Higher cost"],
"features": {{"Cost": "$500/month", "Support": "24/7"}}
}}
],
"verdict": "Platform A best for large hospitals"
}}
],
"best_practices": [
"Start with a pilot program before full deployment",
"Ensure staff training is comprehensive"
],
"step_by_step": [
"Step 1: Assess current infrastructure",
"Step 2: Define use cases",
"Step 3: Select vendor"
],
"pros_cons": {{
"subject": "AI in Healthcare",
"pros": ["Improved accuracy", "Cost savings"],
"cons": ["Initial investment", "Training required"],
"balanced_verdict": "Benefits outweigh costs for most hospitals"
}},
"definitions": {{
"Clinical AI": "AI systems designed for medical diagnosis and treatment recommendations"
}},
"examples": [
"Example: Hospital X reduced readmissions by 25% using predictive AI"
],
"predictions": [
"By 2030, AI will assist in 80% of initial diagnoses"
],
"suggested_outline": [
"1. Introduction: The AI Healthcare Revolution",
"2. Current State: Where We Are Today",
"3. Key Statistics and Trends",
"4. Case Studies: Success Stories",
"5. Implementation Guide",
"6. Future Outlook"
],
"sources": [
{{
"title": "Healthcare AI Report 2024",
"url": "https://example.com",
"relevance_score": 0.95,
"relevance_reason": "Directly addresses adoption statistics",
"content_type": "research report",
"credibility_score": 0.9
}}
],
"confidence": 0.85,
"gaps_identified": [
"Specific cost data for small clinics not found",
"Limited information on regulatory challenges"
],
"follow_up_queries": [
"AI healthcare regulations FDA 2025",
"Small clinic AI implementation costs"
]
}}
```
## CRITICAL RULES
1. **ONLY include information directly from the raw results** - do not make up data
2. **ALWAYS include source URLs** for every statistic, quote, and case study
3. **If a deliverable type has no relevant data**, return an empty array for it
4. **Prioritize recency and credibility** when multiple sources conflict
5. **Answer the PRIMARY QUESTION directly** in 2-3 clear sentences
6. **Keep KEY TAKEAWAYS to 5-7 points** - the most important findings
7. **Add to gaps_identified** if expected information is missing
8. **Suggest follow_up_queries** for gaps or incomplete areas
9. **Rate confidence** based on how well results match the user's intent
10. **Include deliverables ONLY if they are in expected_deliverables** or critical to the question
"""
return prompt
def _build_persona_context(
self,
research_persona: Optional[ResearchPersona],
industry: Optional[str],
target_audience: Optional[str],
) -> str:
"""Build persona context section for prompts."""
if not research_persona and not industry:
return "No specific persona context available."
context_parts = []
if research_persona:
context_parts.append(f"INDUSTRY: {research_persona.default_industry}")
context_parts.append(f"TARGET AUDIENCE: {research_persona.default_target_audience}")
if research_persona.suggested_keywords:
context_parts.append(f"TYPICAL TOPICS: {', '.join(research_persona.suggested_keywords[:5])}")
if research_persona.research_angles:
context_parts.append(f"RESEARCH ANGLES: {', '.join(research_persona.research_angles[:3])}")
else:
if industry:
context_parts.append(f"INDUSTRY: {industry}")
if target_audience:
context_parts.append(f"TARGET AUDIENCE: {target_audience}")
return "\n".join(context_parts)
def _build_competitor_context(self, competitor_data: Optional[List[Dict]]) -> str:
"""Build competitor context section for prompts."""
if not competitor_data:
return ""
competitor_names = []
for comp in competitor_data[:5]: # Limit to 5
name = comp.get("name") or comp.get("domain") or comp.get("url", "Unknown")
competitor_names.append(name)
if competitor_names:
return f"\nKNOWN COMPETITORS: {', '.join(competitor_names)}"
return ""
def _build_deliverables_instructions(self, expected_deliverables: List[str]) -> str:
"""Build specific extraction instructions for each expected deliverable."""
instructions = ["### EXTRACTION INSTRUCTIONS\n"]
instructions.append("For each requested deliverable, extract the following:\n")
deliverable_instructions = {
ExpectedDeliverable.KEY_STATISTICS: """
**STATISTICS**:
- Extract ALL relevant statistics with exact numbers
- Include source attribution (publication name, URL)
- Note the recency of the data
- Rate credibility based on source authority
- Format: statistic statement, value, context, source, URL, credibility score
""",
ExpectedDeliverable.EXPERT_QUOTES: """
**EXPERT QUOTES**:
- Extract authoritative quotes from named experts
- Include speaker name, title, and organization
- Provide context for the quote
- Include source URL
""",
ExpectedDeliverable.CASE_STUDIES: """
**CASE STUDIES**:
- Summarize each case study: challenge → solution → outcome
- Include key metrics and results
- Name the organization involved
- Provide source URL
""",
ExpectedDeliverable.TRENDS: """
**TRENDS**:
- Identify current and emerging trends
- Note direction: growing, declining, emerging, or stable
- List supporting evidence
- Include timeline predictions if available
- Cite sources
""",
ExpectedDeliverable.COMPARISONS: """
**COMPARISONS**:
- Build comparison tables where applicable
- Define clear comparison criteria
- List pros and cons for each option
- Provide a verdict/recommendation if data supports it
""",
ExpectedDeliverable.BEST_PRACTICES: """
**BEST PRACTICES**:
- Extract recommended approaches
- Provide actionable guidelines
- Order by importance or sequence
""",
ExpectedDeliverable.STEP_BY_STEP: """
**STEP BY STEP**:
- Extract process/how-to instructions
- Number steps clearly
- Include any prerequisites or requirements
""",
ExpectedDeliverable.PROS_CONS: """
**PROS AND CONS**:
- List advantages (pros)
- List disadvantages (cons)
- Provide a balanced verdict
""",
ExpectedDeliverable.DEFINITIONS: """
**DEFINITIONS**:
- Extract clear explanations of key terms and concepts
- Keep definitions concise but comprehensive
""",
ExpectedDeliverable.EXAMPLES: """
**EXAMPLES**:
- Extract concrete examples that illustrate key points
- Include real-world applications
""",
ExpectedDeliverable.PREDICTIONS: """
**PREDICTIONS**:
- Extract future outlook and predictions
- Note the source and their track record if known
- Include timeframes where mentioned
""",
ExpectedDeliverable.CITATIONS: """
**CITATIONS**:
- List all authoritative sources with URLs
- Rate credibility and relevance
- Note content type (research, news, opinion, etc.)
""",
}
for deliverable in expected_deliverables:
try:
d_enum = ExpectedDeliverable(deliverable)
if d_enum in deliverable_instructions:
instructions.append(deliverable_instructions[d_enum])
except ValueError:
pass
return "\n".join(instructions)

View File

@@ -0,0 +1,387 @@
"""
Intent Query Generator
Generates multiple targeted research queries based on user intent.
Each query targets a specific deliverable or question.
Author: ALwrity Team
Version: 1.0
"""
import json
from typing import Dict, Any, List, Optional
from loguru import logger
from models.research_intent_models import (
ResearchIntent,
ResearchQuery,
ExpectedDeliverable,
ResearchPurpose,
)
from models.research_persona_models import ResearchPersona
from .intent_prompt_builder import IntentPromptBuilder
class IntentQueryGenerator:
"""
Generates targeted research queries based on user intent.
Instead of a single generic search, generates multiple queries
each targeting a specific deliverable or question.
"""
def __init__(self):
"""Initialize the query generator."""
self.prompt_builder = IntentPromptBuilder()
logger.info("IntentQueryGenerator initialized")
async def generate_queries(
self,
intent: ResearchIntent,
research_persona: Optional[ResearchPersona] = None,
) -> Dict[str, Any]:
"""
Generate targeted research queries based on intent.
Args:
intent: The inferred research intent
research_persona: Optional persona for context
Returns:
Dict with queries, enhanced_keywords, and research_angles
"""
try:
logger.info(f"Generating queries for: {intent.primary_question[:50]}...")
# Build the query generation prompt
prompt = self.prompt_builder.build_query_generation_prompt(
intent=intent,
research_persona=research_persona,
)
# Define the expected JSON schema
query_schema = {
"type": "object",
"properties": {
"queries": {
"type": "array",
"items": {
"type": "object",
"properties": {
"query": {"type": "string"},
"purpose": {"type": "string"},
"provider": {"type": "string"},
"priority": {"type": "integer"},
"expected_results": {"type": "string"}
},
"required": ["query", "purpose", "provider", "priority", "expected_results"]
}
},
"enhanced_keywords": {"type": "array", "items": {"type": "string"}},
"research_angles": {"type": "array", "items": {"type": "string"}}
},
"required": ["queries", "enhanced_keywords", "research_angles"]
}
# Call LLM for query generation
from services.llm_providers.main_text_generation import llm_text_gen
result = llm_text_gen(
prompt=prompt,
json_struct=query_schema,
user_id=None
)
if isinstance(result, dict) and "error" in result:
logger.error(f"Query generation failed: {result.get('error')}")
return self._create_fallback_queries(intent)
# Parse queries
queries = self._parse_queries(result.get("queries", []))
# Ensure we have queries for all expected deliverables
queries = self._ensure_deliverable_coverage(queries, intent)
# Sort by priority
queries.sort(key=lambda q: q.priority, reverse=True)
logger.info(f"Generated {len(queries)} targeted queries")
return {
"queries": queries,
"enhanced_keywords": result.get("enhanced_keywords", []),
"research_angles": result.get("research_angles", []),
}
except Exception as e:
logger.error(f"Error generating queries: {e}")
return self._create_fallback_queries(intent)
def _parse_queries(self, raw_queries: List[Dict]) -> List[ResearchQuery]:
"""Parse raw query data into ResearchQuery objects."""
queries = []
for q in raw_queries:
try:
# Validate purpose
purpose_str = q.get("purpose", "key_statistics")
try:
purpose = ExpectedDeliverable(purpose_str)
except ValueError:
purpose = ExpectedDeliverable.KEY_STATISTICS
query = ResearchQuery(
query=q.get("query", ""),
purpose=purpose,
provider=q.get("provider", "exa"),
priority=min(max(int(q.get("priority", 3)), 1), 5), # Clamp 1-5
expected_results=q.get("expected_results", ""),
)
queries.append(query)
except Exception as e:
logger.warning(f"Failed to parse query: {e}")
continue
return queries
def _ensure_deliverable_coverage(
self,
queries: List[ResearchQuery],
intent: ResearchIntent,
) -> List[ResearchQuery]:
"""Ensure we have queries for all expected deliverables."""
# Get deliverables already covered
covered = set(q.purpose.value for q in queries)
# Check for missing deliverables
for deliverable in intent.expected_deliverables:
if deliverable not in covered:
# Generate a query for this deliverable
query = self._generate_query_for_deliverable(
deliverable=deliverable,
intent=intent,
)
queries.append(query)
return queries
def _generate_query_for_deliverable(
self,
deliverable: str,
intent: ResearchIntent,
) -> ResearchQuery:
"""Generate a query targeting a specific deliverable."""
# Extract topic from primary question
topic = intent.original_input
# Query templates by deliverable type
templates = {
ExpectedDeliverable.KEY_STATISTICS.value: {
"query": f"{topic} statistics data report study",
"provider": "exa",
"priority": 5,
"expected": "Statistical data and research findings",
},
ExpectedDeliverable.EXPERT_QUOTES.value: {
"query": f"{topic} expert opinion interview insights",
"provider": "exa",
"priority": 4,
"expected": "Expert opinions and authoritative quotes",
},
ExpectedDeliverable.CASE_STUDIES.value: {
"query": f"{topic} case study success story implementation example",
"provider": "exa",
"priority": 4,
"expected": "Real-world case studies and examples",
},
ExpectedDeliverable.TRENDS.value: {
"query": f"{topic} trends 2025 future predictions emerging",
"provider": "tavily",
"priority": 4,
"expected": "Current trends and future predictions",
},
ExpectedDeliverable.COMPARISONS.value: {
"query": f"{topic} comparison vs versus alternatives",
"provider": "exa",
"priority": 4,
"expected": "Comparison and alternative options",
},
ExpectedDeliverable.BEST_PRACTICES.value: {
"query": f"{topic} best practices recommendations guidelines",
"provider": "exa",
"priority": 3,
"expected": "Best practices and recommendations",
},
ExpectedDeliverable.STEP_BY_STEP.value: {
"query": f"{topic} how to guide tutorial steps",
"provider": "exa",
"priority": 3,
"expected": "Step-by-step guides and tutorials",
},
ExpectedDeliverable.PROS_CONS.value: {
"query": f"{topic} advantages disadvantages pros cons benefits",
"provider": "exa",
"priority": 3,
"expected": "Pros, cons, and trade-offs",
},
ExpectedDeliverable.DEFINITIONS.value: {
"query": f"what is {topic} definition explained",
"provider": "exa",
"priority": 3,
"expected": "Clear definitions and explanations",
},
ExpectedDeliverable.EXAMPLES.value: {
"query": f"{topic} examples real world applications",
"provider": "exa",
"priority": 3,
"expected": "Real-world examples and applications",
},
ExpectedDeliverable.PREDICTIONS.value: {
"query": f"{topic} future outlook predictions 2025 2030",
"provider": "tavily",
"priority": 4,
"expected": "Future predictions and outlook",
},
ExpectedDeliverable.CITATIONS.value: {
"query": f"{topic} research paper study academic",
"provider": "exa",
"priority": 4,
"expected": "Authoritative academic sources",
},
}
template = templates.get(deliverable, {
"query": f"{topic}",
"provider": "exa",
"priority": 3,
"expected": "General information",
})
return ResearchQuery(
query=template["query"],
purpose=ExpectedDeliverable(deliverable) if deliverable in [e.value for e in ExpectedDeliverable] else ExpectedDeliverable.KEY_STATISTICS,
provider=template["provider"],
priority=template["priority"],
expected_results=template["expected"],
)
def _create_fallback_queries(self, intent: ResearchIntent) -> Dict[str, Any]:
"""Create fallback queries when AI generation fails."""
topic = intent.original_input
# Generate basic queries for each expected deliverable
queries = []
for deliverable in intent.expected_deliverables[:5]: # Limit to 5
query = self._generate_query_for_deliverable(deliverable, intent)
queries.append(query)
# Add a general query if we have none
if not queries:
queries.append(ResearchQuery(
query=topic,
purpose=ExpectedDeliverable.KEY_STATISTICS,
provider="exa",
priority=5,
expected_results="General information and insights",
))
return {
"queries": queries,
"enhanced_keywords": topic.split()[:10],
"research_angles": [
f"Overview of {topic}",
f"Latest trends in {topic}",
],
}
class QueryOptimizer:
"""
Optimizes queries for different research providers.
Different providers have different strengths:
- Exa: Semantic search, good for deep research
- Tavily: Real-time search, good for news/trends
- Google: Factual search, good for basic info
"""
@staticmethod
def optimize_for_exa(query: str, intent: ResearchIntent) -> Dict[str, Any]:
"""Optimize query and parameters for Exa."""
# Determine best Exa settings based on deliverable
deliverables = intent.expected_deliverables
# Determine category
category = None
if ExpectedDeliverable.CITATIONS.value in deliverables:
category = "research paper"
elif ExpectedDeliverable.TRENDS.value in deliverables:
category = "news"
elif intent.purpose == ResearchPurpose.COMPARE.value:
category = "company"
# Determine search type
search_type = "neural" # Default to neural for semantic understanding
if ExpectedDeliverable.TRENDS.value in deliverables:
search_type = "auto" # Auto is better for time-sensitive queries
# Number of results
num_results = 10
if intent.depth == "expert":
num_results = 20
elif intent.depth == "overview":
num_results = 5
return {
"query": query,
"type": search_type,
"category": category,
"num_results": num_results,
"text": True,
"highlights": True,
}
@staticmethod
def optimize_for_tavily(query: str, intent: ResearchIntent) -> Dict[str, Any]:
"""Optimize query and parameters for Tavily."""
deliverables = intent.expected_deliverables
# Determine topic
topic = "general"
if ExpectedDeliverable.TRENDS.value in deliverables:
topic = "news"
# Determine search depth
search_depth = "basic"
if intent.depth in ["detailed", "expert"]:
search_depth = "advanced"
# Include answer for factual queries
include_answer = False
if ExpectedDeliverable.DEFINITIONS.value in deliverables:
include_answer = "advanced"
elif ExpectedDeliverable.KEY_STATISTICS.value in deliverables:
include_answer = "basic"
# Time range for trends
time_range = None
if intent.time_sensitivity == "real_time":
time_range = "day"
elif intent.time_sensitivity == "recent":
time_range = "week"
elif ExpectedDeliverable.TRENDS.value in deliverables:
time_range = "month"
return {
"query": query,
"topic": topic,
"search_depth": search_depth,
"include_answer": include_answer,
"time_range": time_range,
"max_results": 10,
}

View File

@@ -0,0 +1,378 @@
"""
Research Intent Inference Service
Analyzes user input to understand their research intent.
Uses AI to infer:
- What the user wants to accomplish
- What questions need answering
- What deliverables they expect
Author: ALwrity Team
Version: 1.0
"""
import json
from typing import Dict, Any, List, Optional
from loguru import logger
from models.research_intent_models import (
ResearchIntent,
ResearchPurpose,
ContentOutput,
ExpectedDeliverable,
ResearchDepthLevel,
InputType,
IntentInferenceRequest,
IntentInferenceResponse,
ResearchQuery,
)
from models.research_persona_models import ResearchPersona
from .intent_prompt_builder import IntentPromptBuilder
class ResearchIntentInference:
"""
Infers user research intent from minimal input.
Instead of asking a formal questionnaire, this service
uses AI to understand what the user really wants.
"""
def __init__(self):
"""Initialize the intent inference service."""
self.prompt_builder = IntentPromptBuilder()
logger.info("ResearchIntentInference initialized")
async def infer_intent(
self,
user_input: str,
keywords: Optional[List[str]] = None,
research_persona: Optional[ResearchPersona] = None,
competitor_data: Optional[List[Dict]] = None,
industry: Optional[str] = None,
target_audience: Optional[str] = None,
) -> IntentInferenceResponse:
"""
Analyze user input and infer their research intent.
Args:
user_input: User's keywords, question, or goal
keywords: Extracted keywords (optional)
research_persona: User's research persona (optional)
competitor_data: Competitor analysis data (optional)
industry: Industry context (optional)
target_audience: Target audience context (optional)
Returns:
IntentInferenceResponse with inferred intent and suggested queries
"""
try:
logger.info(f"Inferring intent for: {user_input[:100]}...")
keywords = keywords or []
# Build the inference prompt
prompt = self.prompt_builder.build_intent_inference_prompt(
user_input=user_input,
keywords=keywords,
research_persona=research_persona,
competitor_data=competitor_data,
industry=industry,
target_audience=target_audience,
)
# Define the expected JSON schema
intent_schema = {
"type": "object",
"properties": {
"input_type": {"type": "string", "enum": ["keywords", "question", "goal", "mixed"]},
"primary_question": {"type": "string"},
"secondary_questions": {"type": "array", "items": {"type": "string"}},
"purpose": {"type": "string"},
"content_output": {"type": "string"},
"expected_deliverables": {"type": "array", "items": {"type": "string"}},
"depth": {"type": "string", "enum": ["overview", "detailed", "expert"]},
"focus_areas": {"type": "array", "items": {"type": "string"}},
"perspective": {"type": "string"},
"time_sensitivity": {"type": "string"},
"confidence": {"type": "number"},
"needs_clarification": {"type": "boolean"},
"clarifying_questions": {"type": "array", "items": {"type": "string"}},
"analysis_summary": {"type": "string"}
},
"required": [
"input_type", "primary_question", "purpose", "content_output",
"expected_deliverables", "depth", "confidence", "analysis_summary"
]
}
# Call LLM for intent inference
from services.llm_providers.main_text_generation import llm_text_gen
result = llm_text_gen(
prompt=prompt,
json_struct=intent_schema,
user_id=None
)
if isinstance(result, dict) and "error" in result:
logger.error(f"Intent inference failed: {result.get('error')}")
return self._create_fallback_response(user_input, keywords)
# Parse and validate the result
intent = self._parse_intent_result(result, user_input)
# Generate quick options for UI
quick_options = self._generate_quick_options(intent, result)
# Create response
response = IntentInferenceResponse(
success=True,
intent=intent,
analysis_summary=result.get("analysis_summary", "Research intent analyzed"),
suggested_queries=[], # Will be populated by query generator
suggested_keywords=self._extract_keywords_from_input(user_input, keywords),
suggested_angles=result.get("focus_areas", []),
quick_options=quick_options,
)
logger.info(f"Intent inferred: purpose={intent.purpose}, confidence={intent.confidence}")
return response
except Exception as e:
logger.error(f"Error inferring intent: {e}")
return self._create_fallback_response(user_input, keywords or [])
def _parse_intent_result(self, result: Dict[str, Any], user_input: str) -> ResearchIntent:
"""Parse LLM result into ResearchIntent model."""
# Map string values to enums safely
input_type = self._safe_enum(InputType, result.get("input_type", "keywords"), InputType.KEYWORDS)
purpose = self._safe_enum(ResearchPurpose, result.get("purpose", "learn"), ResearchPurpose.LEARN)
content_output = self._safe_enum(ContentOutput, result.get("content_output", "general"), ContentOutput.GENERAL)
depth = self._safe_enum(ResearchDepthLevel, result.get("depth", "detailed"), ResearchDepthLevel.DETAILED)
# Parse expected deliverables
raw_deliverables = result.get("expected_deliverables", [])
expected_deliverables = []
for d in raw_deliverables:
try:
expected_deliverables.append(ExpectedDeliverable(d).value)
except ValueError:
# Skip invalid deliverables
pass
# Ensure we have at least some deliverables
if not expected_deliverables:
expected_deliverables = self._infer_deliverables_from_purpose(purpose)
return ResearchIntent(
primary_question=result.get("primary_question", user_input),
secondary_questions=result.get("secondary_questions", []),
purpose=purpose.value,
content_output=content_output.value,
expected_deliverables=expected_deliverables,
depth=depth.value,
focus_areas=result.get("focus_areas", []),
perspective=result.get("perspective"),
time_sensitivity=result.get("time_sensitivity"),
input_type=input_type.value,
original_input=user_input,
confidence=float(result.get("confidence", 0.7)),
needs_clarification=result.get("needs_clarification", False),
clarifying_questions=result.get("clarifying_questions", []),
)
def _safe_enum(self, enum_class, value: str, default):
"""Safely convert string to enum, returning default if invalid."""
try:
return enum_class(value)
except ValueError:
return default
def _infer_deliverables_from_purpose(self, purpose: ResearchPurpose) -> List[str]:
"""Infer expected deliverables based on research purpose."""
purpose_deliverables = {
ResearchPurpose.LEARN: [
ExpectedDeliverable.DEFINITIONS.value,
ExpectedDeliverable.EXAMPLES.value,
ExpectedDeliverable.KEY_STATISTICS.value,
],
ResearchPurpose.CREATE_CONTENT: [
ExpectedDeliverable.KEY_STATISTICS.value,
ExpectedDeliverable.EXPERT_QUOTES.value,
ExpectedDeliverable.EXAMPLES.value,
ExpectedDeliverable.CASE_STUDIES.value,
],
ResearchPurpose.MAKE_DECISION: [
ExpectedDeliverable.PROS_CONS.value,
ExpectedDeliverable.COMPARISONS.value,
ExpectedDeliverable.BEST_PRACTICES.value,
],
ResearchPurpose.COMPARE: [
ExpectedDeliverable.COMPARISONS.value,
ExpectedDeliverable.PROS_CONS.value,
ExpectedDeliverable.KEY_STATISTICS.value,
],
ResearchPurpose.SOLVE_PROBLEM: [
ExpectedDeliverable.STEP_BY_STEP.value,
ExpectedDeliverable.BEST_PRACTICES.value,
ExpectedDeliverable.CASE_STUDIES.value,
],
ResearchPurpose.FIND_DATA: [
ExpectedDeliverable.KEY_STATISTICS.value,
ExpectedDeliverable.CITATIONS.value,
],
ResearchPurpose.EXPLORE_TRENDS: [
ExpectedDeliverable.TRENDS.value,
ExpectedDeliverable.PREDICTIONS.value,
ExpectedDeliverable.KEY_STATISTICS.value,
],
ResearchPurpose.VALIDATE: [
ExpectedDeliverable.CITATIONS.value,
ExpectedDeliverable.KEY_STATISTICS.value,
ExpectedDeliverable.EXPERT_QUOTES.value,
],
ResearchPurpose.GENERATE_IDEAS: [
ExpectedDeliverable.EXAMPLES.value,
ExpectedDeliverable.TRENDS.value,
ExpectedDeliverable.CASE_STUDIES.value,
],
}
return purpose_deliverables.get(purpose, [ExpectedDeliverable.KEY_STATISTICS.value])
def _generate_quick_options(self, intent: ResearchIntent, result: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Generate quick options for UI confirmation."""
options = []
# Purpose option
options.append({
"id": "purpose",
"label": "Research Purpose",
"value": intent.purpose,
"display": self._purpose_display(intent.purpose),
"alternatives": [p.value for p in ResearchPurpose],
"confidence": result.get("confidence", 0.7),
})
# Content output option
if intent.content_output != ContentOutput.GENERAL.value:
options.append({
"id": "content_output",
"label": "Content Type",
"value": intent.content_output,
"display": intent.content_output.replace("_", " ").title(),
"alternatives": [c.value for c in ContentOutput],
"confidence": result.get("confidence", 0.7),
})
# Deliverables option
options.append({
"id": "deliverables",
"label": "What I'll Find",
"value": intent.expected_deliverables,
"display": [d.replace("_", " ").title() for d in intent.expected_deliverables[:4]],
"alternatives": [d.value for d in ExpectedDeliverable],
"confidence": result.get("confidence", 0.7),
"multi_select": True,
})
# Depth option
options.append({
"id": "depth",
"label": "Research Depth",
"value": intent.depth,
"display": intent.depth.title(),
"alternatives": [d.value for d in ResearchDepthLevel],
"confidence": result.get("confidence", 0.7),
})
return options
def _purpose_display(self, purpose: str) -> str:
"""Get display-friendly purpose text."""
display_map = {
"learn": "Understand this topic",
"create_content": "Create content about this",
"make_decision": "Make a decision",
"compare": "Compare options",
"solve_problem": "Solve a problem",
"find_data": "Find specific data",
"explore_trends": "Explore trends",
"validate": "Validate information",
"generate_ideas": "Generate ideas",
}
return display_map.get(purpose, purpose.replace("_", " ").title())
def _extract_keywords_from_input(self, user_input: str, keywords: List[str]) -> List[str]:
"""Extract and enhance keywords from user input."""
# Start with provided keywords
extracted = list(keywords) if keywords else []
# Simple extraction from input (split on common delimiters)
words = user_input.lower().replace(",", " ").replace(";", " ").split()
# Filter out common words
stop_words = {
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
"have", "has", "had", "do", "does", "did", "will", "would", "could",
"should", "may", "might", "must", "shall", "can", "need", "dare",
"to", "of", "in", "for", "on", "with", "at", "by", "from", "up",
"about", "into", "through", "during", "before", "after", "above",
"below", "between", "under", "again", "further", "then", "once",
"here", "there", "when", "where", "why", "how", "all", "each",
"few", "more", "most", "other", "some", "such", "no", "nor", "not",
"only", "own", "same", "so", "than", "too", "very", "just", "and",
"but", "if", "or", "because", "as", "until", "while", "i", "we",
"you", "they", "what", "which", "who", "whom", "this", "that",
"these", "those", "am", "want", "write", "blog", "post", "article",
}
for word in words:
if word not in stop_words and len(word) > 2 and word not in extracted:
extracted.append(word)
return extracted[:15] # Limit to 15 keywords
def _create_fallback_response(self, user_input: str, keywords: List[str]) -> IntentInferenceResponse:
"""Create a fallback response when AI inference fails."""
# Create a basic intent from the input
fallback_intent = ResearchIntent(
primary_question=f"What are the key insights about: {user_input}?",
secondary_questions=[
f"What are the latest trends in {user_input}?",
f"What are best practices for {user_input}?",
],
purpose=ResearchPurpose.LEARN.value,
content_output=ContentOutput.GENERAL.value,
expected_deliverables=[
ExpectedDeliverable.KEY_STATISTICS.value,
ExpectedDeliverable.EXAMPLES.value,
ExpectedDeliverable.BEST_PRACTICES.value,
],
depth=ResearchDepthLevel.DETAILED.value,
focus_areas=[],
input_type=InputType.KEYWORDS.value,
original_input=user_input,
confidence=0.5,
needs_clarification=True,
clarifying_questions=[
"What type of content are you creating?",
"What specific aspects are you most interested in?",
],
)
return IntentInferenceResponse(
success=True, # Still return success, just with lower confidence
intent=fallback_intent,
analysis_summary=f"Basic research analysis for: {user_input}",
suggested_queries=[],
suggested_keywords=keywords,
suggested_angles=[],
quick_options=[],
)