Base code
This commit is contained in:
23
backend/services/research/intent/__init__.py
Normal file
23
backend/services/research/intent/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
Research Intent Package
|
||||
|
||||
This package provides intent-driven research capabilities:
|
||||
- Intent inference from user input
|
||||
- Targeted query generation
|
||||
- Intent-aware result analysis
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 1.0
|
||||
"""
|
||||
|
||||
from .research_intent_inference import ResearchIntentInference
|
||||
from .intent_query_generator import IntentQueryGenerator
|
||||
from .intent_aware_analyzer import IntentAwareAnalyzer
|
||||
from .intent_prompt_builder import IntentPromptBuilder
|
||||
|
||||
__all__ = [
|
||||
"ResearchIntentInference",
|
||||
"IntentQueryGenerator",
|
||||
"IntentAwareAnalyzer",
|
||||
"IntentPromptBuilder",
|
||||
]
|
||||
547
backend/services/research/intent/intent_aware_analyzer.py
Normal file
547
backend/services/research/intent/intent_aware_analyzer.py
Normal file
@@ -0,0 +1,547 @@
|
||||
"""
|
||||
Intent-Aware Result Analyzer
|
||||
|
||||
Analyzes research results based on user intent.
|
||||
Extracts exactly what the user needs from raw research data.
|
||||
|
||||
This is the key innovation - instead of generic analysis,
|
||||
we analyze results through the lens of what the user wants to accomplish.
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 1.0
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
|
||||
from models.research_intent_models import (
|
||||
ResearchIntent,
|
||||
IntentDrivenResearchResult,
|
||||
ExpectedDeliverable,
|
||||
StatisticWithCitation,
|
||||
ExpertQuote,
|
||||
CaseStudySummary,
|
||||
TrendAnalysis,
|
||||
ComparisonTable,
|
||||
ComparisonItem,
|
||||
ProsCons,
|
||||
SourceWithRelevance,
|
||||
)
|
||||
from models.research_persona_models import ResearchPersona
|
||||
from .intent_prompt_builder import IntentPromptBuilder
|
||||
|
||||
|
||||
class IntentAwareAnalyzer:
|
||||
"""
|
||||
Analyzes research results based on user intent.
|
||||
|
||||
Instead of generic summaries, this extracts exactly what the user
|
||||
needs: statistics, quotes, case studies, trends, etc.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the analyzer."""
|
||||
self.prompt_builder = IntentPromptBuilder()
|
||||
logger.info("IntentAwareAnalyzer initialized")
|
||||
|
||||
async def analyze(
|
||||
self,
|
||||
raw_results: Dict[str, Any],
|
||||
intent: ResearchIntent,
|
||||
research_persona: Optional[ResearchPersona] = None,
|
||||
) -> IntentDrivenResearchResult:
|
||||
"""
|
||||
Analyze raw research results based on user intent.
|
||||
|
||||
Args:
|
||||
raw_results: Raw results from Exa/Tavily/Google
|
||||
intent: The user's research intent
|
||||
research_persona: Optional persona for context
|
||||
|
||||
Returns:
|
||||
IntentDrivenResearchResult with extracted deliverables
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Analyzing results for intent: {intent.primary_question[:50]}...")
|
||||
|
||||
# Format raw results for analysis
|
||||
formatted_results = self._format_raw_results(raw_results)
|
||||
|
||||
# Build the analysis prompt
|
||||
prompt = self.prompt_builder.build_intent_aware_analysis_prompt(
|
||||
raw_results=formatted_results,
|
||||
intent=intent,
|
||||
research_persona=research_persona,
|
||||
)
|
||||
|
||||
# Define the expected JSON schema
|
||||
analysis_schema = self._build_analysis_schema(intent.expected_deliverables)
|
||||
|
||||
# Call LLM for analysis
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
result = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=analysis_schema,
|
||||
user_id=None
|
||||
)
|
||||
|
||||
if isinstance(result, dict) and "error" in result:
|
||||
logger.error(f"Intent-aware analysis failed: {result.get('error')}")
|
||||
return self._create_fallback_result(raw_results, intent)
|
||||
|
||||
# Parse and validate the result
|
||||
analyzed_result = self._parse_analysis_result(result, intent, raw_results)
|
||||
|
||||
logger.info(
|
||||
f"Analysis complete: {len(analyzed_result.key_takeaways)} takeaways, "
|
||||
f"{len(analyzed_result.statistics)} stats, "
|
||||
f"{len(analyzed_result.sources)} sources"
|
||||
)
|
||||
|
||||
return analyzed_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in intent-aware analysis: {e}")
|
||||
return self._create_fallback_result(raw_results, intent)
|
||||
|
||||
def _format_raw_results(self, raw_results: Dict[str, Any]) -> str:
|
||||
"""Format raw research results for LLM analysis."""
|
||||
|
||||
formatted_parts = []
|
||||
|
||||
# Extract content
|
||||
content = raw_results.get("content", "")
|
||||
if content:
|
||||
formatted_parts.append(f"=== MAIN CONTENT ===\n{content[:8000]}")
|
||||
|
||||
# Extract sources with their content
|
||||
sources = raw_results.get("sources", [])
|
||||
if sources:
|
||||
formatted_parts.append("\n=== SOURCES ===")
|
||||
for i, source in enumerate(sources[:15], 1): # Limit to 15 sources
|
||||
title = source.get("title", "Untitled")
|
||||
url = source.get("url", "")
|
||||
excerpt = source.get("excerpt", source.get("text", source.get("content", "")))
|
||||
|
||||
formatted_parts.append(f"\nSource {i}: {title}")
|
||||
formatted_parts.append(f"URL: {url}")
|
||||
if excerpt:
|
||||
formatted_parts.append(f"Content: {excerpt[:500]}")
|
||||
|
||||
# Extract grounding metadata if available (from Google)
|
||||
grounding = raw_results.get("grounding_metadata", {})
|
||||
if grounding:
|
||||
formatted_parts.append("\n=== GROUNDING DATA ===")
|
||||
formatted_parts.append(json.dumps(grounding, indent=2)[:2000])
|
||||
|
||||
# Extract any AI answers (from Tavily)
|
||||
answer = raw_results.get("answer", "")
|
||||
if answer:
|
||||
formatted_parts.append(f"\n=== AI-GENERATED ANSWER ===\n{answer}")
|
||||
|
||||
return "\n".join(formatted_parts)
|
||||
|
||||
def _build_analysis_schema(self, expected_deliverables: List[str]) -> Dict[str, Any]:
|
||||
"""Build JSON schema based on expected deliverables."""
|
||||
|
||||
# Base schema
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"primary_answer": {"type": "string"},
|
||||
"secondary_answers": {
|
||||
"type": "object",
|
||||
"additionalProperties": {"type": "string"}
|
||||
},
|
||||
"executive_summary": {"type": "string"},
|
||||
"key_takeaways": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"maxItems": 7
|
||||
},
|
||||
"confidence": {"type": "number"},
|
||||
"gaps_identified": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"follow_up_queries": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
},
|
||||
"required": ["primary_answer", "executive_summary", "key_takeaways", "confidence"]
|
||||
}
|
||||
|
||||
# Add deliverable-specific properties
|
||||
if ExpectedDeliverable.KEY_STATISTICS.value in expected_deliverables:
|
||||
schema["properties"]["statistics"] = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"statistic": {"type": "string"},
|
||||
"value": {"type": "string"},
|
||||
"context": {"type": "string"},
|
||||
"source": {"type": "string"},
|
||||
"url": {"type": "string"},
|
||||
"credibility": {"type": "number"},
|
||||
"recency": {"type": "string"}
|
||||
},
|
||||
"required": ["statistic", "context", "source", "url"]
|
||||
}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.EXPERT_QUOTES.value in expected_deliverables:
|
||||
schema["properties"]["expert_quotes"] = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"quote": {"type": "string"},
|
||||
"speaker": {"type": "string"},
|
||||
"title": {"type": "string"},
|
||||
"organization": {"type": "string"},
|
||||
"source": {"type": "string"},
|
||||
"url": {"type": "string"}
|
||||
},
|
||||
"required": ["quote", "speaker", "source", "url"]
|
||||
}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.CASE_STUDIES.value in expected_deliverables:
|
||||
schema["properties"]["case_studies"] = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"organization": {"type": "string"},
|
||||
"challenge": {"type": "string"},
|
||||
"solution": {"type": "string"},
|
||||
"outcome": {"type": "string"},
|
||||
"key_metrics": {"type": "array", "items": {"type": "string"}},
|
||||
"source": {"type": "string"},
|
||||
"url": {"type": "string"}
|
||||
},
|
||||
"required": ["title", "organization", "challenge", "solution", "outcome"]
|
||||
}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.TRENDS.value in expected_deliverables:
|
||||
schema["properties"]["trends"] = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"trend": {"type": "string"},
|
||||
"direction": {"type": "string"},
|
||||
"evidence": {"type": "array", "items": {"type": "string"}},
|
||||
"impact": {"type": "string"},
|
||||
"timeline": {"type": "string"},
|
||||
"sources": {"type": "array", "items": {"type": "string"}}
|
||||
},
|
||||
"required": ["trend", "direction", "evidence"]
|
||||
}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.COMPARISONS.value in expected_deliverables:
|
||||
schema["properties"]["comparisons"] = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"criteria": {"type": "array", "items": {"type": "string"}},
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"pros": {"type": "array", "items": {"type": "string"}},
|
||||
"cons": {"type": "array", "items": {"type": "string"}},
|
||||
"features": {"type": "object"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"verdict": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.PROS_CONS.value in expected_deliverables:
|
||||
schema["properties"]["pros_cons"] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"subject": {"type": "string"},
|
||||
"pros": {"type": "array", "items": {"type": "string"}},
|
||||
"cons": {"type": "array", "items": {"type": "string"}},
|
||||
"balanced_verdict": {"type": "string"}
|
||||
}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.BEST_PRACTICES.value in expected_deliverables:
|
||||
schema["properties"]["best_practices"] = {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.STEP_BY_STEP.value in expected_deliverables:
|
||||
schema["properties"]["step_by_step"] = {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.DEFINITIONS.value in expected_deliverables:
|
||||
schema["properties"]["definitions"] = {
|
||||
"type": "object",
|
||||
"additionalProperties": {"type": "string"}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.EXAMPLES.value in expected_deliverables:
|
||||
schema["properties"]["examples"] = {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.PREDICTIONS.value in expected_deliverables:
|
||||
schema["properties"]["predictions"] = {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
|
||||
# Always include sources and suggested outline
|
||||
schema["properties"]["sources"] = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"url": {"type": "string"},
|
||||
"relevance_score": {"type": "number"},
|
||||
"relevance_reason": {"type": "string"},
|
||||
"content_type": {"type": "string"},
|
||||
"credibility_score": {"type": "number"}
|
||||
},
|
||||
"required": ["title", "url"]
|
||||
}
|
||||
}
|
||||
|
||||
schema["properties"]["suggested_outline"] = {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
|
||||
return schema
|
||||
|
||||
def _parse_analysis_result(
|
||||
self,
|
||||
result: Dict[str, Any],
|
||||
intent: ResearchIntent,
|
||||
raw_results: Dict[str, Any],
|
||||
) -> IntentDrivenResearchResult:
|
||||
"""Parse LLM analysis result into structured format."""
|
||||
|
||||
# Parse statistics
|
||||
statistics = []
|
||||
for stat in result.get("statistics", []):
|
||||
try:
|
||||
statistics.append(StatisticWithCitation(
|
||||
statistic=stat.get("statistic", ""),
|
||||
value=stat.get("value"),
|
||||
context=stat.get("context", ""),
|
||||
source=stat.get("source", ""),
|
||||
url=stat.get("url", ""),
|
||||
credibility=float(stat.get("credibility", 0.8)),
|
||||
recency=stat.get("recency"),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse statistic: {e}")
|
||||
|
||||
# Parse expert quotes
|
||||
expert_quotes = []
|
||||
for quote in result.get("expert_quotes", []):
|
||||
try:
|
||||
expert_quotes.append(ExpertQuote(
|
||||
quote=quote.get("quote", ""),
|
||||
speaker=quote.get("speaker", ""),
|
||||
title=quote.get("title"),
|
||||
organization=quote.get("organization"),
|
||||
context=quote.get("context"),
|
||||
source=quote.get("source", ""),
|
||||
url=quote.get("url", ""),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse expert quote: {e}")
|
||||
|
||||
# Parse case studies
|
||||
case_studies = []
|
||||
for cs in result.get("case_studies", []):
|
||||
try:
|
||||
case_studies.append(CaseStudySummary(
|
||||
title=cs.get("title", ""),
|
||||
organization=cs.get("organization", ""),
|
||||
challenge=cs.get("challenge", ""),
|
||||
solution=cs.get("solution", ""),
|
||||
outcome=cs.get("outcome", ""),
|
||||
key_metrics=cs.get("key_metrics", []),
|
||||
source=cs.get("source", ""),
|
||||
url=cs.get("url", ""),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse case study: {e}")
|
||||
|
||||
# Parse trends
|
||||
trends = []
|
||||
for trend in result.get("trends", []):
|
||||
try:
|
||||
trends.append(TrendAnalysis(
|
||||
trend=trend.get("trend", ""),
|
||||
direction=trend.get("direction", "growing"),
|
||||
evidence=trend.get("evidence", []),
|
||||
impact=trend.get("impact"),
|
||||
timeline=trend.get("timeline"),
|
||||
sources=trend.get("sources", []),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse trend: {e}")
|
||||
|
||||
# Parse comparisons
|
||||
comparisons = []
|
||||
for comp in result.get("comparisons", []):
|
||||
try:
|
||||
items = []
|
||||
for item in comp.get("items", []):
|
||||
items.append(ComparisonItem(
|
||||
name=item.get("name", ""),
|
||||
description=item.get("description"),
|
||||
pros=item.get("pros", []),
|
||||
cons=item.get("cons", []),
|
||||
features=item.get("features", {}),
|
||||
rating=item.get("rating"),
|
||||
source=item.get("source"),
|
||||
))
|
||||
comparisons.append(ComparisonTable(
|
||||
title=comp.get("title", ""),
|
||||
criteria=comp.get("criteria", []),
|
||||
items=items,
|
||||
winner=comp.get("winner"),
|
||||
verdict=comp.get("verdict"),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse comparison: {e}")
|
||||
|
||||
# Parse pros/cons
|
||||
pros_cons = None
|
||||
pc_data = result.get("pros_cons")
|
||||
if pc_data:
|
||||
try:
|
||||
pros_cons = ProsCons(
|
||||
subject=pc_data.get("subject", intent.original_input),
|
||||
pros=pc_data.get("pros", []),
|
||||
cons=pc_data.get("cons", []),
|
||||
balanced_verdict=pc_data.get("balanced_verdict", ""),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse pros/cons: {e}")
|
||||
|
||||
# Parse sources
|
||||
sources = []
|
||||
for src in result.get("sources", []):
|
||||
try:
|
||||
sources.append(SourceWithRelevance(
|
||||
title=src.get("title", ""),
|
||||
url=src.get("url", ""),
|
||||
excerpt=src.get("excerpt"),
|
||||
relevance_score=float(src.get("relevance_score", 0.8)),
|
||||
relevance_reason=src.get("relevance_reason"),
|
||||
content_type=src.get("content_type"),
|
||||
published_date=src.get("published_date"),
|
||||
credibility_score=float(src.get("credibility_score", 0.8)),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse source: {e}")
|
||||
|
||||
# If no sources from analysis, extract from raw results
|
||||
if not sources:
|
||||
sources = self._extract_sources_from_raw(raw_results)
|
||||
|
||||
return IntentDrivenResearchResult(
|
||||
success=True,
|
||||
primary_answer=result.get("primary_answer", ""),
|
||||
secondary_answers=result.get("secondary_answers", {}),
|
||||
statistics=statistics,
|
||||
expert_quotes=expert_quotes,
|
||||
case_studies=case_studies,
|
||||
comparisons=comparisons,
|
||||
trends=trends,
|
||||
best_practices=result.get("best_practices", []),
|
||||
step_by_step=result.get("step_by_step", []),
|
||||
pros_cons=pros_cons,
|
||||
definitions=result.get("definitions", {}),
|
||||
examples=result.get("examples", []),
|
||||
predictions=result.get("predictions", []),
|
||||
executive_summary=result.get("executive_summary", ""),
|
||||
key_takeaways=result.get("key_takeaways", []),
|
||||
suggested_outline=result.get("suggested_outline", []),
|
||||
sources=sources,
|
||||
raw_content=self._format_raw_results(raw_results)[:5000],
|
||||
confidence=float(result.get("confidence", 0.7)),
|
||||
gaps_identified=result.get("gaps_identified", []),
|
||||
follow_up_queries=result.get("follow_up_queries", []),
|
||||
original_intent=intent,
|
||||
)
|
||||
|
||||
def _extract_sources_from_raw(self, raw_results: Dict[str, Any]) -> List[SourceWithRelevance]:
|
||||
"""Extract sources from raw results when analysis doesn't provide them."""
|
||||
|
||||
sources = []
|
||||
for src in raw_results.get("sources", [])[:10]:
|
||||
try:
|
||||
sources.append(SourceWithRelevance(
|
||||
title=src.get("title", "Untitled"),
|
||||
url=src.get("url", ""),
|
||||
excerpt=src.get("excerpt", src.get("text", ""))[:200],
|
||||
relevance_score=0.8,
|
||||
credibility_score=float(src.get("credibility_score", 0.8)),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract source: {e}")
|
||||
|
||||
return sources
|
||||
|
||||
def _create_fallback_result(
|
||||
self,
|
||||
raw_results: Dict[str, Any],
|
||||
intent: ResearchIntent,
|
||||
) -> IntentDrivenResearchResult:
|
||||
"""Create a fallback result when AI analysis fails."""
|
||||
|
||||
# Extract basic information from raw results
|
||||
content = raw_results.get("content", "")
|
||||
sources = self._extract_sources_from_raw(raw_results)
|
||||
|
||||
# Create basic takeaways from content
|
||||
key_takeaways = []
|
||||
if content:
|
||||
sentences = content.split(". ")[:5]
|
||||
key_takeaways = [s.strip() + "." for s in sentences if len(s) > 20]
|
||||
|
||||
return IntentDrivenResearchResult(
|
||||
success=True,
|
||||
primary_answer=f"Research findings for: {intent.primary_question}",
|
||||
secondary_answers={},
|
||||
executive_summary=content[:300] if content else "Research completed",
|
||||
key_takeaways=key_takeaways,
|
||||
sources=sources,
|
||||
raw_content=self._format_raw_results(raw_results)[:5000],
|
||||
confidence=0.5,
|
||||
gaps_identified=[
|
||||
"AI analysis failed - showing raw results",
|
||||
"Manual review recommended"
|
||||
],
|
||||
follow_up_queries=[],
|
||||
original_intent=intent,
|
||||
)
|
||||
627
backend/services/research/intent/intent_prompt_builder.py
Normal file
627
backend/services/research/intent/intent_prompt_builder.py
Normal file
@@ -0,0 +1,627 @@
|
||||
"""
|
||||
Intent Prompt Builder
|
||||
|
||||
Builds comprehensive AI prompts for:
|
||||
1. Intent inference from user input
|
||||
2. Targeted query generation
|
||||
3. Intent-aware result analysis
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 1.0
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
|
||||
from models.research_intent_models import (
|
||||
ResearchIntent,
|
||||
ResearchPurpose,
|
||||
ContentOutput,
|
||||
ExpectedDeliverable,
|
||||
ResearchDepthLevel,
|
||||
)
|
||||
from models.research_persona_models import ResearchPersona
|
||||
|
||||
|
||||
class IntentPromptBuilder:
|
||||
"""Builds prompts for intent-driven research."""
|
||||
|
||||
# Purpose explanations for the AI
|
||||
PURPOSE_EXPLANATIONS = {
|
||||
ResearchPurpose.LEARN: "User wants to understand a topic for personal knowledge",
|
||||
ResearchPurpose.CREATE_CONTENT: "User will create content (blog, video, podcast) from this research",
|
||||
ResearchPurpose.MAKE_DECISION: "User needs to make a choice/decision based on research",
|
||||
ResearchPurpose.COMPARE: "User wants to compare alternatives or competitors",
|
||||
ResearchPurpose.SOLVE_PROBLEM: "User is looking for a solution to a specific problem",
|
||||
ResearchPurpose.FIND_DATA: "User needs specific statistics, facts, or citations",
|
||||
ResearchPurpose.EXPLORE_TRENDS: "User wants to understand current/future trends",
|
||||
ResearchPurpose.VALIDATE: "User wants to verify or fact-check information",
|
||||
ResearchPurpose.GENERATE_IDEAS: "User wants to brainstorm content ideas",
|
||||
}
|
||||
|
||||
# Deliverable descriptions
|
||||
DELIVERABLE_DESCRIPTIONS = {
|
||||
ExpectedDeliverable.KEY_STATISTICS: "Numbers, percentages, data points with citations",
|
||||
ExpectedDeliverable.EXPERT_QUOTES: "Authoritative quotes from industry experts",
|
||||
ExpectedDeliverable.CASE_STUDIES: "Real examples and success stories",
|
||||
ExpectedDeliverable.COMPARISONS: "Side-by-side analysis tables",
|
||||
ExpectedDeliverable.TRENDS: "Current and emerging industry trends",
|
||||
ExpectedDeliverable.BEST_PRACTICES: "Recommended approaches and guidelines",
|
||||
ExpectedDeliverable.STEP_BY_STEP: "Process guides and how-to instructions",
|
||||
ExpectedDeliverable.PROS_CONS: "Advantages and disadvantages analysis",
|
||||
ExpectedDeliverable.DEFINITIONS: "Clear explanations of concepts and terms",
|
||||
ExpectedDeliverable.CITATIONS: "Authoritative sources for reference",
|
||||
ExpectedDeliverable.EXAMPLES: "Concrete examples to illustrate points",
|
||||
ExpectedDeliverable.PREDICTIONS: "Future outlook and predictions",
|
||||
}
|
||||
|
||||
def build_intent_inference_prompt(
|
||||
self,
|
||||
user_input: str,
|
||||
keywords: List[str],
|
||||
research_persona: Optional[ResearchPersona] = None,
|
||||
competitor_data: Optional[List[Dict]] = None,
|
||||
industry: Optional[str] = None,
|
||||
target_audience: Optional[str] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Build prompt for inferring user's research intent.
|
||||
|
||||
This prompt analyzes the user's input and determines:
|
||||
- What they want to accomplish
|
||||
- What questions they need answered
|
||||
- What specific deliverables they need
|
||||
"""
|
||||
|
||||
# Build persona context
|
||||
persona_context = self._build_persona_context(research_persona, industry, target_audience)
|
||||
|
||||
# Build competitor context
|
||||
competitor_context = self._build_competitor_context(competitor_data)
|
||||
|
||||
prompt = f"""You are an expert research intent analyzer. Your job is to understand what a content creator REALLY needs from their research.
|
||||
|
||||
## USER INPUT
|
||||
"{user_input}"
|
||||
|
||||
{f"KEYWORDS: {', '.join(keywords)}" if keywords else ""}
|
||||
|
||||
## USER CONTEXT
|
||||
{persona_context}
|
||||
|
||||
{competitor_context}
|
||||
|
||||
## YOUR TASK
|
||||
|
||||
Analyze the user's input and infer their research intent. Determine:
|
||||
|
||||
1. **INPUT TYPE**: Is this:
|
||||
- "keywords": Simple topic keywords (e.g., "AI healthcare 2025")
|
||||
- "question": A specific question (e.g., "What are the best AI tools for healthcare?")
|
||||
- "goal": A goal statement (e.g., "I need to write a blog about AI in healthcare")
|
||||
- "mixed": Combination of above
|
||||
|
||||
2. **PRIMARY QUESTION**: What is the main question to answer? Convert their input into a clear question.
|
||||
|
||||
3. **SECONDARY QUESTIONS**: What related questions should also be answered? (3-5 questions)
|
||||
|
||||
4. **PURPOSE**: Why are they researching? Choose ONE:
|
||||
- "learn": Understand a topic for personal knowledge
|
||||
- "create_content": Create content (blog, video, podcast)
|
||||
- "make_decision": Make a choice between options
|
||||
- "compare": Compare alternatives/competitors
|
||||
- "solve_problem": Find a solution
|
||||
- "find_data": Get specific statistics/facts
|
||||
- "explore_trends": Understand industry trends
|
||||
- "validate": Verify claims/information
|
||||
- "generate_ideas": Brainstorm ideas
|
||||
|
||||
5. **CONTENT OUTPUT**: What will they create? Choose ONE:
|
||||
- "blog", "podcast", "video", "social_post", "newsletter", "presentation", "report", "whitepaper", "email", "general"
|
||||
|
||||
6. **EXPECTED DELIVERABLES**: What specific outputs do they need? Choose ALL that apply:
|
||||
- "key_statistics": Numbers, data points
|
||||
- "expert_quotes": Authoritative quotes
|
||||
- "case_studies": Real examples
|
||||
- "comparisons": Side-by-side analysis
|
||||
- "trends": Industry trends
|
||||
- "best_practices": Recommendations
|
||||
- "step_by_step": How-to guides
|
||||
- "pros_cons": Advantages/disadvantages
|
||||
- "definitions": Concept explanations
|
||||
- "citations": Source references
|
||||
- "examples": Concrete examples
|
||||
- "predictions": Future outlook
|
||||
|
||||
7. **DEPTH**: How deep should the research go?
|
||||
- "overview": Quick summary
|
||||
- "detailed": In-depth analysis
|
||||
- "expert": Comprehensive expert-level
|
||||
|
||||
8. **FOCUS AREAS**: What specific aspects should be researched? (2-4 areas)
|
||||
|
||||
9. **PERSPECTIVE**: From whose viewpoint? (e.g., "marketing manager", "small business owner")
|
||||
|
||||
10. **TIME SENSITIVITY**: Is recency important?
|
||||
- "real_time": Latest only (past 24-48 hours)
|
||||
- "recent": Past week/month
|
||||
- "historical": Include older content
|
||||
- "evergreen": Timeless content
|
||||
|
||||
11. **CONFIDENCE**: How confident are you in this inference? (0.0-1.0)
|
||||
- If < 0.7, set needs_clarification to true and provide clarifying_questions
|
||||
|
||||
## OUTPUT FORMAT
|
||||
|
||||
Return a JSON object:
|
||||
```json
|
||||
{{
|
||||
"input_type": "keywords|question|goal|mixed",
|
||||
"primary_question": "The main question to answer",
|
||||
"secondary_questions": ["question 1", "question 2", "question 3"],
|
||||
"purpose": "one of the purpose options",
|
||||
"content_output": "one of the content options",
|
||||
"expected_deliverables": ["deliverable1", "deliverable2"],
|
||||
"depth": "overview|detailed|expert",
|
||||
"focus_areas": ["area1", "area2"],
|
||||
"perspective": "target perspective or null",
|
||||
"time_sensitivity": "real_time|recent|historical|evergreen",
|
||||
"confidence": 0.85,
|
||||
"needs_clarification": false,
|
||||
"clarifying_questions": [],
|
||||
"analysis_summary": "Brief summary of what the user wants"
|
||||
}}
|
||||
```
|
||||
|
||||
## IMPORTANT RULES
|
||||
|
||||
1. Always convert vague input into a specific primary question
|
||||
2. Infer deliverables based on purpose (e.g., create_content → statistics + examples)
|
||||
3. Use persona context to refine perspective and focus areas
|
||||
4. If input is ambiguous, provide clarifying questions
|
||||
5. Default to "detailed" depth unless input suggests otherwise
|
||||
6. For content creation, include relevant deliverables automatically
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
def build_query_generation_prompt(
|
||||
self,
|
||||
intent: ResearchIntent,
|
||||
research_persona: Optional[ResearchPersona] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Build prompt for generating targeted research queries.
|
||||
|
||||
Generates multiple queries, each targeting a specific deliverable.
|
||||
"""
|
||||
|
||||
deliverables_list = "\n".join([
|
||||
f"- {d}: {self.DELIVERABLE_DESCRIPTIONS.get(ExpectedDeliverable(d), d)}"
|
||||
for d in intent.expected_deliverables
|
||||
])
|
||||
|
||||
persona_keywords = ""
|
||||
if research_persona and research_persona.suggested_keywords:
|
||||
persona_keywords = f"\nSUGGESTED KEYWORDS FROM PERSONA: {', '.join(research_persona.suggested_keywords[:10])}"
|
||||
|
||||
prompt = f"""You are a research query optimizer. Generate multiple targeted search queries based on the user's research intent.
|
||||
|
||||
## RESEARCH INTENT
|
||||
|
||||
PRIMARY QUESTION: {intent.primary_question}
|
||||
|
||||
SECONDARY QUESTIONS:
|
||||
{chr(10).join(f'- {q}' for q in intent.secondary_questions) if intent.secondary_questions else 'None'}
|
||||
|
||||
PURPOSE: {intent.purpose} - {self.PURPOSE_EXPLANATIONS.get(ResearchPurpose(intent.purpose), intent.purpose)}
|
||||
|
||||
CONTENT OUTPUT: {intent.content_output}
|
||||
|
||||
EXPECTED DELIVERABLES:
|
||||
{deliverables_list}
|
||||
|
||||
DEPTH: {intent.depth}
|
||||
|
||||
FOCUS AREAS: {', '.join(intent.focus_areas) if intent.focus_areas else 'General'}
|
||||
|
||||
PERSPECTIVE: {intent.perspective or 'General audience'}
|
||||
|
||||
TIME SENSITIVITY: {intent.time_sensitivity or 'No specific requirement'}
|
||||
{persona_keywords}
|
||||
|
||||
## YOUR TASK
|
||||
|
||||
Generate 4-8 targeted research queries. Each query should:
|
||||
1. Target a specific deliverable or question
|
||||
2. Be optimized for semantic search (Exa/Tavily)
|
||||
3. Include relevant context for better results
|
||||
|
||||
For each query, specify:
|
||||
- The query string
|
||||
- What deliverable it targets
|
||||
- Best provider (exa for semantic/deep, tavily for news/real-time, google for factual)
|
||||
- Priority (1-5, higher = more important)
|
||||
- What we expect to find
|
||||
|
||||
## OUTPUT FORMAT
|
||||
|
||||
Return a JSON object:
|
||||
```json
|
||||
{{
|
||||
"queries": [
|
||||
{{
|
||||
"query": "Healthcare AI adoption statistics 2025 hospitals implementation data",
|
||||
"purpose": "key_statistics",
|
||||
"provider": "exa",
|
||||
"priority": 5,
|
||||
"expected_results": "Statistics on hospital AI adoption rates"
|
||||
}},
|
||||
{{
|
||||
"query": "AI healthcare trends predictions future outlook 2025 2026",
|
||||
"purpose": "trends",
|
||||
"provider": "tavily",
|
||||
"priority": 4,
|
||||
"expected_results": "Current trends and future predictions in healthcare AI"
|
||||
}}
|
||||
],
|
||||
"enhanced_keywords": ["keyword1", "keyword2", "keyword3"],
|
||||
"research_angles": [
|
||||
"Angle 1: Focus on adoption challenges",
|
||||
"Angle 2: Focus on ROI and outcomes"
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
## QUERY OPTIMIZATION RULES
|
||||
|
||||
1. For STATISTICS: Include words like "statistics", "data", "percentage", "report", "study"
|
||||
2. For CASE STUDIES: Include "case study", "success story", "implementation", "example"
|
||||
3. For TRENDS: Include "trends", "future", "predictions", "emerging", year numbers
|
||||
4. For EXPERT QUOTES: Include expert names if known, or "expert opinion", "interview"
|
||||
5. For COMPARISONS: Include "vs", "compare", "comparison", "alternative"
|
||||
6. For NEWS/REAL-TIME: Use Tavily, include recent year/month
|
||||
7. For ACADEMIC/DEEP: Use Exa with neural search
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
def build_intent_aware_analysis_prompt(
|
||||
self,
|
||||
raw_results: str,
|
||||
intent: ResearchIntent,
|
||||
research_persona: Optional[ResearchPersona] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Build prompt for analyzing research results based on user intent.
|
||||
|
||||
This is the key prompt that extracts exactly what the user needs.
|
||||
"""
|
||||
|
||||
purpose_explanation = self.PURPOSE_EXPLANATIONS.get(
|
||||
ResearchPurpose(intent.purpose),
|
||||
intent.purpose
|
||||
)
|
||||
|
||||
deliverables_instructions = self._build_deliverables_instructions(intent.expected_deliverables)
|
||||
|
||||
perspective_instruction = ""
|
||||
if intent.perspective:
|
||||
perspective_instruction = f"\n**PERSPECTIVE**: Analyze results from the viewpoint of: {intent.perspective}"
|
||||
|
||||
prompt = f"""You are a research analyst helping a content creator find exactly what they need. Your job is to analyze raw research results and extract precisely what the user is looking for.
|
||||
|
||||
## USER'S RESEARCH INTENT
|
||||
|
||||
PRIMARY QUESTION: {intent.primary_question}
|
||||
|
||||
SECONDARY QUESTIONS:
|
||||
{chr(10).join(f'- {q}' for q in intent.secondary_questions) if intent.secondary_questions else 'None specified'}
|
||||
|
||||
PURPOSE: {intent.purpose}
|
||||
→ {purpose_explanation}
|
||||
|
||||
CONTENT OUTPUT: {intent.content_output}
|
||||
|
||||
EXPECTED DELIVERABLES: {', '.join(intent.expected_deliverables)}
|
||||
|
||||
FOCUS AREAS: {', '.join(intent.focus_areas) if intent.focus_areas else 'General'}
|
||||
{perspective_instruction}
|
||||
|
||||
## RAW RESEARCH RESULTS
|
||||
|
||||
{raw_results[:15000]} # Truncated for token limits
|
||||
|
||||
## YOUR TASK
|
||||
|
||||
Analyze the raw research results and extract EXACTLY what the user needs.
|
||||
|
||||
{deliverables_instructions}
|
||||
|
||||
## OUTPUT REQUIREMENTS
|
||||
|
||||
Provide results in this JSON structure:
|
||||
|
||||
```json
|
||||
{{
|
||||
"primary_answer": "Direct 2-3 sentence answer to the primary question",
|
||||
"secondary_answers": {{
|
||||
"Question 1?": "Answer to question 1",
|
||||
"Question 2?": "Answer to question 2"
|
||||
}},
|
||||
"executive_summary": "2-3 sentence executive summary of all findings",
|
||||
"key_takeaways": [
|
||||
"Key takeaway 1 - most important finding",
|
||||
"Key takeaway 2",
|
||||
"Key takeaway 3",
|
||||
"Key takeaway 4",
|
||||
"Key takeaway 5"
|
||||
],
|
||||
"statistics": [
|
||||
{{
|
||||
"statistic": "72% of hospitals plan to adopt AI by 2025",
|
||||
"value": "72%",
|
||||
"context": "Survey of 500 US hospitals in 2024",
|
||||
"source": "Healthcare AI Report 2024",
|
||||
"url": "https://example.com/report",
|
||||
"credibility": 0.9,
|
||||
"recency": "2024"
|
||||
}}
|
||||
],
|
||||
"expert_quotes": [
|
||||
{{
|
||||
"quote": "AI will revolutionize patient care within 5 years",
|
||||
"speaker": "Dr. Jane Smith",
|
||||
"title": "Chief Medical Officer",
|
||||
"organization": "HealthTech Inc",
|
||||
"source": "TechCrunch",
|
||||
"url": "https://example.com/article"
|
||||
}}
|
||||
],
|
||||
"case_studies": [
|
||||
{{
|
||||
"title": "Mayo Clinic AI Implementation",
|
||||
"organization": "Mayo Clinic",
|
||||
"challenge": "High patient wait times",
|
||||
"solution": "AI-powered triage system",
|
||||
"outcome": "40% reduction in wait times",
|
||||
"key_metrics": ["40% faster triage", "95% patient satisfaction"],
|
||||
"source": "Healthcare IT News",
|
||||
"url": "https://example.com"
|
||||
}}
|
||||
],
|
||||
"trends": [
|
||||
{{
|
||||
"trend": "AI-assisted diagnostics adoption",
|
||||
"direction": "growing",
|
||||
"evidence": ["25% YoY growth", "Major hospital chains investing"],
|
||||
"impact": "Could reduce misdiagnosis by 30%",
|
||||
"timeline": "Expected mainstream by 2027",
|
||||
"sources": ["url1", "url2"]
|
||||
}}
|
||||
],
|
||||
"comparisons": [
|
||||
{{
|
||||
"title": "Top AI Healthcare Platforms",
|
||||
"criteria": ["Cost", "Features", "Support"],
|
||||
"items": [
|
||||
{{
|
||||
"name": "Platform A",
|
||||
"pros": ["Easy integration", "Good support"],
|
||||
"cons": ["Higher cost"],
|
||||
"features": {{"Cost": "$500/month", "Support": "24/7"}}
|
||||
}}
|
||||
],
|
||||
"verdict": "Platform A best for large hospitals"
|
||||
}}
|
||||
],
|
||||
"best_practices": [
|
||||
"Start with a pilot program before full deployment",
|
||||
"Ensure staff training is comprehensive"
|
||||
],
|
||||
"step_by_step": [
|
||||
"Step 1: Assess current infrastructure",
|
||||
"Step 2: Define use cases",
|
||||
"Step 3: Select vendor"
|
||||
],
|
||||
"pros_cons": {{
|
||||
"subject": "AI in Healthcare",
|
||||
"pros": ["Improved accuracy", "Cost savings"],
|
||||
"cons": ["Initial investment", "Training required"],
|
||||
"balanced_verdict": "Benefits outweigh costs for most hospitals"
|
||||
}},
|
||||
"definitions": {{
|
||||
"Clinical AI": "AI systems designed for medical diagnosis and treatment recommendations"
|
||||
}},
|
||||
"examples": [
|
||||
"Example: Hospital X reduced readmissions by 25% using predictive AI"
|
||||
],
|
||||
"predictions": [
|
||||
"By 2030, AI will assist in 80% of initial diagnoses"
|
||||
],
|
||||
"suggested_outline": [
|
||||
"1. Introduction: The AI Healthcare Revolution",
|
||||
"2. Current State: Where We Are Today",
|
||||
"3. Key Statistics and Trends",
|
||||
"4. Case Studies: Success Stories",
|
||||
"5. Implementation Guide",
|
||||
"6. Future Outlook"
|
||||
],
|
||||
"sources": [
|
||||
{{
|
||||
"title": "Healthcare AI Report 2024",
|
||||
"url": "https://example.com",
|
||||
"relevance_score": 0.95,
|
||||
"relevance_reason": "Directly addresses adoption statistics",
|
||||
"content_type": "research report",
|
||||
"credibility_score": 0.9
|
||||
}}
|
||||
],
|
||||
"confidence": 0.85,
|
||||
"gaps_identified": [
|
||||
"Specific cost data for small clinics not found",
|
||||
"Limited information on regulatory challenges"
|
||||
],
|
||||
"follow_up_queries": [
|
||||
"AI healthcare regulations FDA 2025",
|
||||
"Small clinic AI implementation costs"
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
## CRITICAL RULES
|
||||
|
||||
1. **ONLY include information directly from the raw results** - do not make up data
|
||||
2. **ALWAYS include source URLs** for every statistic, quote, and case study
|
||||
3. **If a deliverable type has no relevant data**, return an empty array for it
|
||||
4. **Prioritize recency and credibility** when multiple sources conflict
|
||||
5. **Answer the PRIMARY QUESTION directly** in 2-3 clear sentences
|
||||
6. **Keep KEY TAKEAWAYS to 5-7 points** - the most important findings
|
||||
7. **Add to gaps_identified** if expected information is missing
|
||||
8. **Suggest follow_up_queries** for gaps or incomplete areas
|
||||
9. **Rate confidence** based on how well results match the user's intent
|
||||
10. **Include deliverables ONLY if they are in expected_deliverables** or critical to the question
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
def _build_persona_context(
|
||||
self,
|
||||
research_persona: Optional[ResearchPersona],
|
||||
industry: Optional[str],
|
||||
target_audience: Optional[str],
|
||||
) -> str:
|
||||
"""Build persona context section for prompts."""
|
||||
|
||||
if not research_persona and not industry:
|
||||
return "No specific persona context available."
|
||||
|
||||
context_parts = []
|
||||
|
||||
if research_persona:
|
||||
context_parts.append(f"INDUSTRY: {research_persona.default_industry}")
|
||||
context_parts.append(f"TARGET AUDIENCE: {research_persona.default_target_audience}")
|
||||
if research_persona.suggested_keywords:
|
||||
context_parts.append(f"TYPICAL TOPICS: {', '.join(research_persona.suggested_keywords[:5])}")
|
||||
if research_persona.research_angles:
|
||||
context_parts.append(f"RESEARCH ANGLES: {', '.join(research_persona.research_angles[:3])}")
|
||||
else:
|
||||
if industry:
|
||||
context_parts.append(f"INDUSTRY: {industry}")
|
||||
if target_audience:
|
||||
context_parts.append(f"TARGET AUDIENCE: {target_audience}")
|
||||
|
||||
return "\n".join(context_parts)
|
||||
|
||||
def _build_competitor_context(self, competitor_data: Optional[List[Dict]]) -> str:
|
||||
"""Build competitor context section for prompts."""
|
||||
|
||||
if not competitor_data:
|
||||
return ""
|
||||
|
||||
competitor_names = []
|
||||
for comp in competitor_data[:5]: # Limit to 5
|
||||
name = comp.get("name") or comp.get("domain") or comp.get("url", "Unknown")
|
||||
competitor_names.append(name)
|
||||
|
||||
if competitor_names:
|
||||
return f"\nKNOWN COMPETITORS: {', '.join(competitor_names)}"
|
||||
|
||||
return ""
|
||||
|
||||
def _build_deliverables_instructions(self, expected_deliverables: List[str]) -> str:
|
||||
"""Build specific extraction instructions for each expected deliverable."""
|
||||
|
||||
instructions = ["### EXTRACTION INSTRUCTIONS\n"]
|
||||
instructions.append("For each requested deliverable, extract the following:\n")
|
||||
|
||||
deliverable_instructions = {
|
||||
ExpectedDeliverable.KEY_STATISTICS: """
|
||||
**STATISTICS**:
|
||||
- Extract ALL relevant statistics with exact numbers
|
||||
- Include source attribution (publication name, URL)
|
||||
- Note the recency of the data
|
||||
- Rate credibility based on source authority
|
||||
- Format: statistic statement, value, context, source, URL, credibility score
|
||||
""",
|
||||
ExpectedDeliverable.EXPERT_QUOTES: """
|
||||
**EXPERT QUOTES**:
|
||||
- Extract authoritative quotes from named experts
|
||||
- Include speaker name, title, and organization
|
||||
- Provide context for the quote
|
||||
- Include source URL
|
||||
""",
|
||||
ExpectedDeliverable.CASE_STUDIES: """
|
||||
**CASE STUDIES**:
|
||||
- Summarize each case study: challenge → solution → outcome
|
||||
- Include key metrics and results
|
||||
- Name the organization involved
|
||||
- Provide source URL
|
||||
""",
|
||||
ExpectedDeliverable.TRENDS: """
|
||||
**TRENDS**:
|
||||
- Identify current and emerging trends
|
||||
- Note direction: growing, declining, emerging, or stable
|
||||
- List supporting evidence
|
||||
- Include timeline predictions if available
|
||||
- Cite sources
|
||||
""",
|
||||
ExpectedDeliverable.COMPARISONS: """
|
||||
**COMPARISONS**:
|
||||
- Build comparison tables where applicable
|
||||
- Define clear comparison criteria
|
||||
- List pros and cons for each option
|
||||
- Provide a verdict/recommendation if data supports it
|
||||
""",
|
||||
ExpectedDeliverable.BEST_PRACTICES: """
|
||||
**BEST PRACTICES**:
|
||||
- Extract recommended approaches
|
||||
- Provide actionable guidelines
|
||||
- Order by importance or sequence
|
||||
""",
|
||||
ExpectedDeliverable.STEP_BY_STEP: """
|
||||
**STEP BY STEP**:
|
||||
- Extract process/how-to instructions
|
||||
- Number steps clearly
|
||||
- Include any prerequisites or requirements
|
||||
""",
|
||||
ExpectedDeliverable.PROS_CONS: """
|
||||
**PROS AND CONS**:
|
||||
- List advantages (pros)
|
||||
- List disadvantages (cons)
|
||||
- Provide a balanced verdict
|
||||
""",
|
||||
ExpectedDeliverable.DEFINITIONS: """
|
||||
**DEFINITIONS**:
|
||||
- Extract clear explanations of key terms and concepts
|
||||
- Keep definitions concise but comprehensive
|
||||
""",
|
||||
ExpectedDeliverable.EXAMPLES: """
|
||||
**EXAMPLES**:
|
||||
- Extract concrete examples that illustrate key points
|
||||
- Include real-world applications
|
||||
""",
|
||||
ExpectedDeliverable.PREDICTIONS: """
|
||||
**PREDICTIONS**:
|
||||
- Extract future outlook and predictions
|
||||
- Note the source and their track record if known
|
||||
- Include timeframes where mentioned
|
||||
""",
|
||||
ExpectedDeliverable.CITATIONS: """
|
||||
**CITATIONS**:
|
||||
- List all authoritative sources with URLs
|
||||
- Rate credibility and relevance
|
||||
- Note content type (research, news, opinion, etc.)
|
||||
""",
|
||||
}
|
||||
|
||||
for deliverable in expected_deliverables:
|
||||
try:
|
||||
d_enum = ExpectedDeliverable(deliverable)
|
||||
if d_enum in deliverable_instructions:
|
||||
instructions.append(deliverable_instructions[d_enum])
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return "\n".join(instructions)
|
||||
387
backend/services/research/intent/intent_query_generator.py
Normal file
387
backend/services/research/intent/intent_query_generator.py
Normal file
@@ -0,0 +1,387 @@
|
||||
"""
|
||||
Intent Query Generator
|
||||
|
||||
Generates multiple targeted research queries based on user intent.
|
||||
Each query targets a specific deliverable or question.
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 1.0
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
|
||||
from models.research_intent_models import (
|
||||
ResearchIntent,
|
||||
ResearchQuery,
|
||||
ExpectedDeliverable,
|
||||
ResearchPurpose,
|
||||
)
|
||||
from models.research_persona_models import ResearchPersona
|
||||
from .intent_prompt_builder import IntentPromptBuilder
|
||||
|
||||
|
||||
class IntentQueryGenerator:
|
||||
"""
|
||||
Generates targeted research queries based on user intent.
|
||||
|
||||
Instead of a single generic search, generates multiple queries
|
||||
each targeting a specific deliverable or question.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the query generator."""
|
||||
self.prompt_builder = IntentPromptBuilder()
|
||||
logger.info("IntentQueryGenerator initialized")
|
||||
|
||||
async def generate_queries(
|
||||
self,
|
||||
intent: ResearchIntent,
|
||||
research_persona: Optional[ResearchPersona] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate targeted research queries based on intent.
|
||||
|
||||
Args:
|
||||
intent: The inferred research intent
|
||||
research_persona: Optional persona for context
|
||||
|
||||
Returns:
|
||||
Dict with queries, enhanced_keywords, and research_angles
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Generating queries for: {intent.primary_question[:50]}...")
|
||||
|
||||
# Build the query generation prompt
|
||||
prompt = self.prompt_builder.build_query_generation_prompt(
|
||||
intent=intent,
|
||||
research_persona=research_persona,
|
||||
)
|
||||
|
||||
# Define the expected JSON schema
|
||||
query_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"queries": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string"},
|
||||
"purpose": {"type": "string"},
|
||||
"provider": {"type": "string"},
|
||||
"priority": {"type": "integer"},
|
||||
"expected_results": {"type": "string"}
|
||||
},
|
||||
"required": ["query", "purpose", "provider", "priority", "expected_results"]
|
||||
}
|
||||
},
|
||||
"enhanced_keywords": {"type": "array", "items": {"type": "string"}},
|
||||
"research_angles": {"type": "array", "items": {"type": "string"}}
|
||||
},
|
||||
"required": ["queries", "enhanced_keywords", "research_angles"]
|
||||
}
|
||||
|
||||
# Call LLM for query generation
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
result = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=query_schema,
|
||||
user_id=None
|
||||
)
|
||||
|
||||
if isinstance(result, dict) and "error" in result:
|
||||
logger.error(f"Query generation failed: {result.get('error')}")
|
||||
return self._create_fallback_queries(intent)
|
||||
|
||||
# Parse queries
|
||||
queries = self._parse_queries(result.get("queries", []))
|
||||
|
||||
# Ensure we have queries for all expected deliverables
|
||||
queries = self._ensure_deliverable_coverage(queries, intent)
|
||||
|
||||
# Sort by priority
|
||||
queries.sort(key=lambda q: q.priority, reverse=True)
|
||||
|
||||
logger.info(f"Generated {len(queries)} targeted queries")
|
||||
|
||||
return {
|
||||
"queries": queries,
|
||||
"enhanced_keywords": result.get("enhanced_keywords", []),
|
||||
"research_angles": result.get("research_angles", []),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating queries: {e}")
|
||||
return self._create_fallback_queries(intent)
|
||||
|
||||
def _parse_queries(self, raw_queries: List[Dict]) -> List[ResearchQuery]:
|
||||
"""Parse raw query data into ResearchQuery objects."""
|
||||
|
||||
queries = []
|
||||
for q in raw_queries:
|
||||
try:
|
||||
# Validate purpose
|
||||
purpose_str = q.get("purpose", "key_statistics")
|
||||
try:
|
||||
purpose = ExpectedDeliverable(purpose_str)
|
||||
except ValueError:
|
||||
purpose = ExpectedDeliverable.KEY_STATISTICS
|
||||
|
||||
query = ResearchQuery(
|
||||
query=q.get("query", ""),
|
||||
purpose=purpose,
|
||||
provider=q.get("provider", "exa"),
|
||||
priority=min(max(int(q.get("priority", 3)), 1), 5), # Clamp 1-5
|
||||
expected_results=q.get("expected_results", ""),
|
||||
)
|
||||
queries.append(query)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse query: {e}")
|
||||
continue
|
||||
|
||||
return queries
|
||||
|
||||
def _ensure_deliverable_coverage(
|
||||
self,
|
||||
queries: List[ResearchQuery],
|
||||
intent: ResearchIntent,
|
||||
) -> List[ResearchQuery]:
|
||||
"""Ensure we have queries for all expected deliverables."""
|
||||
|
||||
# Get deliverables already covered
|
||||
covered = set(q.purpose.value for q in queries)
|
||||
|
||||
# Check for missing deliverables
|
||||
for deliverable in intent.expected_deliverables:
|
||||
if deliverable not in covered:
|
||||
# Generate a query for this deliverable
|
||||
query = self._generate_query_for_deliverable(
|
||||
deliverable=deliverable,
|
||||
intent=intent,
|
||||
)
|
||||
queries.append(query)
|
||||
|
||||
return queries
|
||||
|
||||
def _generate_query_for_deliverable(
|
||||
self,
|
||||
deliverable: str,
|
||||
intent: ResearchIntent,
|
||||
) -> ResearchQuery:
|
||||
"""Generate a query targeting a specific deliverable."""
|
||||
|
||||
# Extract topic from primary question
|
||||
topic = intent.original_input
|
||||
|
||||
# Query templates by deliverable type
|
||||
templates = {
|
||||
ExpectedDeliverable.KEY_STATISTICS.value: {
|
||||
"query": f"{topic} statistics data report study",
|
||||
"provider": "exa",
|
||||
"priority": 5,
|
||||
"expected": "Statistical data and research findings",
|
||||
},
|
||||
ExpectedDeliverable.EXPERT_QUOTES.value: {
|
||||
"query": f"{topic} expert opinion interview insights",
|
||||
"provider": "exa",
|
||||
"priority": 4,
|
||||
"expected": "Expert opinions and authoritative quotes",
|
||||
},
|
||||
ExpectedDeliverable.CASE_STUDIES.value: {
|
||||
"query": f"{topic} case study success story implementation example",
|
||||
"provider": "exa",
|
||||
"priority": 4,
|
||||
"expected": "Real-world case studies and examples",
|
||||
},
|
||||
ExpectedDeliverable.TRENDS.value: {
|
||||
"query": f"{topic} trends 2025 future predictions emerging",
|
||||
"provider": "tavily",
|
||||
"priority": 4,
|
||||
"expected": "Current trends and future predictions",
|
||||
},
|
||||
ExpectedDeliverable.COMPARISONS.value: {
|
||||
"query": f"{topic} comparison vs versus alternatives",
|
||||
"provider": "exa",
|
||||
"priority": 4,
|
||||
"expected": "Comparison and alternative options",
|
||||
},
|
||||
ExpectedDeliverable.BEST_PRACTICES.value: {
|
||||
"query": f"{topic} best practices recommendations guidelines",
|
||||
"provider": "exa",
|
||||
"priority": 3,
|
||||
"expected": "Best practices and recommendations",
|
||||
},
|
||||
ExpectedDeliverable.STEP_BY_STEP.value: {
|
||||
"query": f"{topic} how to guide tutorial steps",
|
||||
"provider": "exa",
|
||||
"priority": 3,
|
||||
"expected": "Step-by-step guides and tutorials",
|
||||
},
|
||||
ExpectedDeliverable.PROS_CONS.value: {
|
||||
"query": f"{topic} advantages disadvantages pros cons benefits",
|
||||
"provider": "exa",
|
||||
"priority": 3,
|
||||
"expected": "Pros, cons, and trade-offs",
|
||||
},
|
||||
ExpectedDeliverable.DEFINITIONS.value: {
|
||||
"query": f"what is {topic} definition explained",
|
||||
"provider": "exa",
|
||||
"priority": 3,
|
||||
"expected": "Clear definitions and explanations",
|
||||
},
|
||||
ExpectedDeliverable.EXAMPLES.value: {
|
||||
"query": f"{topic} examples real world applications",
|
||||
"provider": "exa",
|
||||
"priority": 3,
|
||||
"expected": "Real-world examples and applications",
|
||||
},
|
||||
ExpectedDeliverable.PREDICTIONS.value: {
|
||||
"query": f"{topic} future outlook predictions 2025 2030",
|
||||
"provider": "tavily",
|
||||
"priority": 4,
|
||||
"expected": "Future predictions and outlook",
|
||||
},
|
||||
ExpectedDeliverable.CITATIONS.value: {
|
||||
"query": f"{topic} research paper study academic",
|
||||
"provider": "exa",
|
||||
"priority": 4,
|
||||
"expected": "Authoritative academic sources",
|
||||
},
|
||||
}
|
||||
|
||||
template = templates.get(deliverable, {
|
||||
"query": f"{topic}",
|
||||
"provider": "exa",
|
||||
"priority": 3,
|
||||
"expected": "General information",
|
||||
})
|
||||
|
||||
return ResearchQuery(
|
||||
query=template["query"],
|
||||
purpose=ExpectedDeliverable(deliverable) if deliverable in [e.value for e in ExpectedDeliverable] else ExpectedDeliverable.KEY_STATISTICS,
|
||||
provider=template["provider"],
|
||||
priority=template["priority"],
|
||||
expected_results=template["expected"],
|
||||
)
|
||||
|
||||
def _create_fallback_queries(self, intent: ResearchIntent) -> Dict[str, Any]:
|
||||
"""Create fallback queries when AI generation fails."""
|
||||
|
||||
topic = intent.original_input
|
||||
|
||||
# Generate basic queries for each expected deliverable
|
||||
queries = []
|
||||
for deliverable in intent.expected_deliverables[:5]: # Limit to 5
|
||||
query = self._generate_query_for_deliverable(deliverable, intent)
|
||||
queries.append(query)
|
||||
|
||||
# Add a general query if we have none
|
||||
if not queries:
|
||||
queries.append(ResearchQuery(
|
||||
query=topic,
|
||||
purpose=ExpectedDeliverable.KEY_STATISTICS,
|
||||
provider="exa",
|
||||
priority=5,
|
||||
expected_results="General information and insights",
|
||||
))
|
||||
|
||||
return {
|
||||
"queries": queries,
|
||||
"enhanced_keywords": topic.split()[:10],
|
||||
"research_angles": [
|
||||
f"Overview of {topic}",
|
||||
f"Latest trends in {topic}",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class QueryOptimizer:
|
||||
"""
|
||||
Optimizes queries for different research providers.
|
||||
|
||||
Different providers have different strengths:
|
||||
- Exa: Semantic search, good for deep research
|
||||
- Tavily: Real-time search, good for news/trends
|
||||
- Google: Factual search, good for basic info
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def optimize_for_exa(query: str, intent: ResearchIntent) -> Dict[str, Any]:
|
||||
"""Optimize query and parameters for Exa."""
|
||||
|
||||
# Determine best Exa settings based on deliverable
|
||||
deliverables = intent.expected_deliverables
|
||||
|
||||
# Determine category
|
||||
category = None
|
||||
if ExpectedDeliverable.CITATIONS.value in deliverables:
|
||||
category = "research paper"
|
||||
elif ExpectedDeliverable.TRENDS.value in deliverables:
|
||||
category = "news"
|
||||
elif intent.purpose == ResearchPurpose.COMPARE.value:
|
||||
category = "company"
|
||||
|
||||
# Determine search type
|
||||
search_type = "neural" # Default to neural for semantic understanding
|
||||
if ExpectedDeliverable.TRENDS.value in deliverables:
|
||||
search_type = "auto" # Auto is better for time-sensitive queries
|
||||
|
||||
# Number of results
|
||||
num_results = 10
|
||||
if intent.depth == "expert":
|
||||
num_results = 20
|
||||
elif intent.depth == "overview":
|
||||
num_results = 5
|
||||
|
||||
return {
|
||||
"query": query,
|
||||
"type": search_type,
|
||||
"category": category,
|
||||
"num_results": num_results,
|
||||
"text": True,
|
||||
"highlights": True,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def optimize_for_tavily(query: str, intent: ResearchIntent) -> Dict[str, Any]:
|
||||
"""Optimize query and parameters for Tavily."""
|
||||
|
||||
deliverables = intent.expected_deliverables
|
||||
|
||||
# Determine topic
|
||||
topic = "general"
|
||||
if ExpectedDeliverable.TRENDS.value in deliverables:
|
||||
topic = "news"
|
||||
|
||||
# Determine search depth
|
||||
search_depth = "basic"
|
||||
if intent.depth in ["detailed", "expert"]:
|
||||
search_depth = "advanced"
|
||||
|
||||
# Include answer for factual queries
|
||||
include_answer = False
|
||||
if ExpectedDeliverable.DEFINITIONS.value in deliverables:
|
||||
include_answer = "advanced"
|
||||
elif ExpectedDeliverable.KEY_STATISTICS.value in deliverables:
|
||||
include_answer = "basic"
|
||||
|
||||
# Time range for trends
|
||||
time_range = None
|
||||
if intent.time_sensitivity == "real_time":
|
||||
time_range = "day"
|
||||
elif intent.time_sensitivity == "recent":
|
||||
time_range = "week"
|
||||
elif ExpectedDeliverable.TRENDS.value in deliverables:
|
||||
time_range = "month"
|
||||
|
||||
return {
|
||||
"query": query,
|
||||
"topic": topic,
|
||||
"search_depth": search_depth,
|
||||
"include_answer": include_answer,
|
||||
"time_range": time_range,
|
||||
"max_results": 10,
|
||||
}
|
||||
378
backend/services/research/intent/research_intent_inference.py
Normal file
378
backend/services/research/intent/research_intent_inference.py
Normal file
@@ -0,0 +1,378 @@
|
||||
"""
|
||||
Research Intent Inference Service
|
||||
|
||||
Analyzes user input to understand their research intent.
|
||||
Uses AI to infer:
|
||||
- What the user wants to accomplish
|
||||
- What questions need answering
|
||||
- What deliverables they expect
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 1.0
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
|
||||
from models.research_intent_models import (
|
||||
ResearchIntent,
|
||||
ResearchPurpose,
|
||||
ContentOutput,
|
||||
ExpectedDeliverable,
|
||||
ResearchDepthLevel,
|
||||
InputType,
|
||||
IntentInferenceRequest,
|
||||
IntentInferenceResponse,
|
||||
ResearchQuery,
|
||||
)
|
||||
from models.research_persona_models import ResearchPersona
|
||||
from .intent_prompt_builder import IntentPromptBuilder
|
||||
|
||||
|
||||
class ResearchIntentInference:
|
||||
"""
|
||||
Infers user research intent from minimal input.
|
||||
|
||||
Instead of asking a formal questionnaire, this service
|
||||
uses AI to understand what the user really wants.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the intent inference service."""
|
||||
self.prompt_builder = IntentPromptBuilder()
|
||||
logger.info("ResearchIntentInference initialized")
|
||||
|
||||
async def infer_intent(
|
||||
self,
|
||||
user_input: str,
|
||||
keywords: Optional[List[str]] = None,
|
||||
research_persona: Optional[ResearchPersona] = None,
|
||||
competitor_data: Optional[List[Dict]] = None,
|
||||
industry: Optional[str] = None,
|
||||
target_audience: Optional[str] = None,
|
||||
) -> IntentInferenceResponse:
|
||||
"""
|
||||
Analyze user input and infer their research intent.
|
||||
|
||||
Args:
|
||||
user_input: User's keywords, question, or goal
|
||||
keywords: Extracted keywords (optional)
|
||||
research_persona: User's research persona (optional)
|
||||
competitor_data: Competitor analysis data (optional)
|
||||
industry: Industry context (optional)
|
||||
target_audience: Target audience context (optional)
|
||||
|
||||
Returns:
|
||||
IntentInferenceResponse with inferred intent and suggested queries
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Inferring intent for: {user_input[:100]}...")
|
||||
|
||||
keywords = keywords or []
|
||||
|
||||
# Build the inference prompt
|
||||
prompt = self.prompt_builder.build_intent_inference_prompt(
|
||||
user_input=user_input,
|
||||
keywords=keywords,
|
||||
research_persona=research_persona,
|
||||
competitor_data=competitor_data,
|
||||
industry=industry,
|
||||
target_audience=target_audience,
|
||||
)
|
||||
|
||||
# Define the expected JSON schema
|
||||
intent_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_type": {"type": "string", "enum": ["keywords", "question", "goal", "mixed"]},
|
||||
"primary_question": {"type": "string"},
|
||||
"secondary_questions": {"type": "array", "items": {"type": "string"}},
|
||||
"purpose": {"type": "string"},
|
||||
"content_output": {"type": "string"},
|
||||
"expected_deliverables": {"type": "array", "items": {"type": "string"}},
|
||||
"depth": {"type": "string", "enum": ["overview", "detailed", "expert"]},
|
||||
"focus_areas": {"type": "array", "items": {"type": "string"}},
|
||||
"perspective": {"type": "string"},
|
||||
"time_sensitivity": {"type": "string"},
|
||||
"confidence": {"type": "number"},
|
||||
"needs_clarification": {"type": "boolean"},
|
||||
"clarifying_questions": {"type": "array", "items": {"type": "string"}},
|
||||
"analysis_summary": {"type": "string"}
|
||||
},
|
||||
"required": [
|
||||
"input_type", "primary_question", "purpose", "content_output",
|
||||
"expected_deliverables", "depth", "confidence", "analysis_summary"
|
||||
]
|
||||
}
|
||||
|
||||
# Call LLM for intent inference
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
result = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=intent_schema,
|
||||
user_id=None
|
||||
)
|
||||
|
||||
if isinstance(result, dict) and "error" in result:
|
||||
logger.error(f"Intent inference failed: {result.get('error')}")
|
||||
return self._create_fallback_response(user_input, keywords)
|
||||
|
||||
# Parse and validate the result
|
||||
intent = self._parse_intent_result(result, user_input)
|
||||
|
||||
# Generate quick options for UI
|
||||
quick_options = self._generate_quick_options(intent, result)
|
||||
|
||||
# Create response
|
||||
response = IntentInferenceResponse(
|
||||
success=True,
|
||||
intent=intent,
|
||||
analysis_summary=result.get("analysis_summary", "Research intent analyzed"),
|
||||
suggested_queries=[], # Will be populated by query generator
|
||||
suggested_keywords=self._extract_keywords_from_input(user_input, keywords),
|
||||
suggested_angles=result.get("focus_areas", []),
|
||||
quick_options=quick_options,
|
||||
)
|
||||
|
||||
logger.info(f"Intent inferred: purpose={intent.purpose}, confidence={intent.confidence}")
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error inferring intent: {e}")
|
||||
return self._create_fallback_response(user_input, keywords or [])
|
||||
|
||||
def _parse_intent_result(self, result: Dict[str, Any], user_input: str) -> ResearchIntent:
|
||||
"""Parse LLM result into ResearchIntent model."""
|
||||
|
||||
# Map string values to enums safely
|
||||
input_type = self._safe_enum(InputType, result.get("input_type", "keywords"), InputType.KEYWORDS)
|
||||
purpose = self._safe_enum(ResearchPurpose, result.get("purpose", "learn"), ResearchPurpose.LEARN)
|
||||
content_output = self._safe_enum(ContentOutput, result.get("content_output", "general"), ContentOutput.GENERAL)
|
||||
depth = self._safe_enum(ResearchDepthLevel, result.get("depth", "detailed"), ResearchDepthLevel.DETAILED)
|
||||
|
||||
# Parse expected deliverables
|
||||
raw_deliverables = result.get("expected_deliverables", [])
|
||||
expected_deliverables = []
|
||||
for d in raw_deliverables:
|
||||
try:
|
||||
expected_deliverables.append(ExpectedDeliverable(d).value)
|
||||
except ValueError:
|
||||
# Skip invalid deliverables
|
||||
pass
|
||||
|
||||
# Ensure we have at least some deliverables
|
||||
if not expected_deliverables:
|
||||
expected_deliverables = self._infer_deliverables_from_purpose(purpose)
|
||||
|
||||
return ResearchIntent(
|
||||
primary_question=result.get("primary_question", user_input),
|
||||
secondary_questions=result.get("secondary_questions", []),
|
||||
purpose=purpose.value,
|
||||
content_output=content_output.value,
|
||||
expected_deliverables=expected_deliverables,
|
||||
depth=depth.value,
|
||||
focus_areas=result.get("focus_areas", []),
|
||||
perspective=result.get("perspective"),
|
||||
time_sensitivity=result.get("time_sensitivity"),
|
||||
input_type=input_type.value,
|
||||
original_input=user_input,
|
||||
confidence=float(result.get("confidence", 0.7)),
|
||||
needs_clarification=result.get("needs_clarification", False),
|
||||
clarifying_questions=result.get("clarifying_questions", []),
|
||||
)
|
||||
|
||||
def _safe_enum(self, enum_class, value: str, default):
|
||||
"""Safely convert string to enum, returning default if invalid."""
|
||||
try:
|
||||
return enum_class(value)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
def _infer_deliverables_from_purpose(self, purpose: ResearchPurpose) -> List[str]:
|
||||
"""Infer expected deliverables based on research purpose."""
|
||||
|
||||
purpose_deliverables = {
|
||||
ResearchPurpose.LEARN: [
|
||||
ExpectedDeliverable.DEFINITIONS.value,
|
||||
ExpectedDeliverable.EXAMPLES.value,
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
],
|
||||
ResearchPurpose.CREATE_CONTENT: [
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
ExpectedDeliverable.EXPERT_QUOTES.value,
|
||||
ExpectedDeliverable.EXAMPLES.value,
|
||||
ExpectedDeliverable.CASE_STUDIES.value,
|
||||
],
|
||||
ResearchPurpose.MAKE_DECISION: [
|
||||
ExpectedDeliverable.PROS_CONS.value,
|
||||
ExpectedDeliverable.COMPARISONS.value,
|
||||
ExpectedDeliverable.BEST_PRACTICES.value,
|
||||
],
|
||||
ResearchPurpose.COMPARE: [
|
||||
ExpectedDeliverable.COMPARISONS.value,
|
||||
ExpectedDeliverable.PROS_CONS.value,
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
],
|
||||
ResearchPurpose.SOLVE_PROBLEM: [
|
||||
ExpectedDeliverable.STEP_BY_STEP.value,
|
||||
ExpectedDeliverable.BEST_PRACTICES.value,
|
||||
ExpectedDeliverable.CASE_STUDIES.value,
|
||||
],
|
||||
ResearchPurpose.FIND_DATA: [
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
ExpectedDeliverable.CITATIONS.value,
|
||||
],
|
||||
ResearchPurpose.EXPLORE_TRENDS: [
|
||||
ExpectedDeliverable.TRENDS.value,
|
||||
ExpectedDeliverable.PREDICTIONS.value,
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
],
|
||||
ResearchPurpose.VALIDATE: [
|
||||
ExpectedDeliverable.CITATIONS.value,
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
ExpectedDeliverable.EXPERT_QUOTES.value,
|
||||
],
|
||||
ResearchPurpose.GENERATE_IDEAS: [
|
||||
ExpectedDeliverable.EXAMPLES.value,
|
||||
ExpectedDeliverable.TRENDS.value,
|
||||
ExpectedDeliverable.CASE_STUDIES.value,
|
||||
],
|
||||
}
|
||||
|
||||
return purpose_deliverables.get(purpose, [ExpectedDeliverable.KEY_STATISTICS.value])
|
||||
|
||||
def _generate_quick_options(self, intent: ResearchIntent, result: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Generate quick options for UI confirmation."""
|
||||
|
||||
options = []
|
||||
|
||||
# Purpose option
|
||||
options.append({
|
||||
"id": "purpose",
|
||||
"label": "Research Purpose",
|
||||
"value": intent.purpose,
|
||||
"display": self._purpose_display(intent.purpose),
|
||||
"alternatives": [p.value for p in ResearchPurpose],
|
||||
"confidence": result.get("confidence", 0.7),
|
||||
})
|
||||
|
||||
# Content output option
|
||||
if intent.content_output != ContentOutput.GENERAL.value:
|
||||
options.append({
|
||||
"id": "content_output",
|
||||
"label": "Content Type",
|
||||
"value": intent.content_output,
|
||||
"display": intent.content_output.replace("_", " ").title(),
|
||||
"alternatives": [c.value for c in ContentOutput],
|
||||
"confidence": result.get("confidence", 0.7),
|
||||
})
|
||||
|
||||
# Deliverables option
|
||||
options.append({
|
||||
"id": "deliverables",
|
||||
"label": "What I'll Find",
|
||||
"value": intent.expected_deliverables,
|
||||
"display": [d.replace("_", " ").title() for d in intent.expected_deliverables[:4]],
|
||||
"alternatives": [d.value for d in ExpectedDeliverable],
|
||||
"confidence": result.get("confidence", 0.7),
|
||||
"multi_select": True,
|
||||
})
|
||||
|
||||
# Depth option
|
||||
options.append({
|
||||
"id": "depth",
|
||||
"label": "Research Depth",
|
||||
"value": intent.depth,
|
||||
"display": intent.depth.title(),
|
||||
"alternatives": [d.value for d in ResearchDepthLevel],
|
||||
"confidence": result.get("confidence", 0.7),
|
||||
})
|
||||
|
||||
return options
|
||||
|
||||
def _purpose_display(self, purpose: str) -> str:
|
||||
"""Get display-friendly purpose text."""
|
||||
display_map = {
|
||||
"learn": "Understand this topic",
|
||||
"create_content": "Create content about this",
|
||||
"make_decision": "Make a decision",
|
||||
"compare": "Compare options",
|
||||
"solve_problem": "Solve a problem",
|
||||
"find_data": "Find specific data",
|
||||
"explore_trends": "Explore trends",
|
||||
"validate": "Validate information",
|
||||
"generate_ideas": "Generate ideas",
|
||||
}
|
||||
return display_map.get(purpose, purpose.replace("_", " ").title())
|
||||
|
||||
def _extract_keywords_from_input(self, user_input: str, keywords: List[str]) -> List[str]:
|
||||
"""Extract and enhance keywords from user input."""
|
||||
|
||||
# Start with provided keywords
|
||||
extracted = list(keywords) if keywords else []
|
||||
|
||||
# Simple extraction from input (split on common delimiters)
|
||||
words = user_input.lower().replace(",", " ").replace(";", " ").split()
|
||||
|
||||
# Filter out common words
|
||||
stop_words = {
|
||||
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
|
||||
"have", "has", "had", "do", "does", "did", "will", "would", "could",
|
||||
"should", "may", "might", "must", "shall", "can", "need", "dare",
|
||||
"to", "of", "in", "for", "on", "with", "at", "by", "from", "up",
|
||||
"about", "into", "through", "during", "before", "after", "above",
|
||||
"below", "between", "under", "again", "further", "then", "once",
|
||||
"here", "there", "when", "where", "why", "how", "all", "each",
|
||||
"few", "more", "most", "other", "some", "such", "no", "nor", "not",
|
||||
"only", "own", "same", "so", "than", "too", "very", "just", "and",
|
||||
"but", "if", "or", "because", "as", "until", "while", "i", "we",
|
||||
"you", "they", "what", "which", "who", "whom", "this", "that",
|
||||
"these", "those", "am", "want", "write", "blog", "post", "article",
|
||||
}
|
||||
|
||||
for word in words:
|
||||
if word not in stop_words and len(word) > 2 and word not in extracted:
|
||||
extracted.append(word)
|
||||
|
||||
return extracted[:15] # Limit to 15 keywords
|
||||
|
||||
def _create_fallback_response(self, user_input: str, keywords: List[str]) -> IntentInferenceResponse:
|
||||
"""Create a fallback response when AI inference fails."""
|
||||
|
||||
# Create a basic intent from the input
|
||||
fallback_intent = ResearchIntent(
|
||||
primary_question=f"What are the key insights about: {user_input}?",
|
||||
secondary_questions=[
|
||||
f"What are the latest trends in {user_input}?",
|
||||
f"What are best practices for {user_input}?",
|
||||
],
|
||||
purpose=ResearchPurpose.LEARN.value,
|
||||
content_output=ContentOutput.GENERAL.value,
|
||||
expected_deliverables=[
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
ExpectedDeliverable.EXAMPLES.value,
|
||||
ExpectedDeliverable.BEST_PRACTICES.value,
|
||||
],
|
||||
depth=ResearchDepthLevel.DETAILED.value,
|
||||
focus_areas=[],
|
||||
input_type=InputType.KEYWORDS.value,
|
||||
original_input=user_input,
|
||||
confidence=0.5,
|
||||
needs_clarification=True,
|
||||
clarifying_questions=[
|
||||
"What type of content are you creating?",
|
||||
"What specific aspects are you most interested in?",
|
||||
],
|
||||
)
|
||||
|
||||
return IntentInferenceResponse(
|
||||
success=True, # Still return success, just with lower confidence
|
||||
intent=fallback_intent,
|
||||
analysis_summary=f"Basic research analysis for: {user_input}",
|
||||
suggested_queries=[],
|
||||
suggested_keywords=keywords,
|
||||
suggested_angles=[],
|
||||
quick_options=[],
|
||||
)
|
||||
Reference in New Issue
Block a user