Added video studio router and endpoints. Added research router and endpoints. Added youtube router and endpoints. Added onboarding utils router and endpoints. Added onboarding utils service. Added onboarding utils models. Added onboarding utils routes. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils.
This commit is contained in:
23
backend/services/research/intent/__init__.py
Normal file
23
backend/services/research/intent/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
Research Intent Package
|
||||
|
||||
This package provides intent-driven research capabilities:
|
||||
- Intent inference from user input
|
||||
- Targeted query generation
|
||||
- Intent-aware result analysis
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 1.0
|
||||
"""
|
||||
|
||||
from .research_intent_inference import ResearchIntentInference
|
||||
from .intent_query_generator import IntentQueryGenerator
|
||||
from .intent_aware_analyzer import IntentAwareAnalyzer
|
||||
from .intent_prompt_builder import IntentPromptBuilder
|
||||
|
||||
__all__ = [
|
||||
"ResearchIntentInference",
|
||||
"IntentQueryGenerator",
|
||||
"IntentAwareAnalyzer",
|
||||
"IntentPromptBuilder",
|
||||
]
|
||||
547
backend/services/research/intent/intent_aware_analyzer.py
Normal file
547
backend/services/research/intent/intent_aware_analyzer.py
Normal file
@@ -0,0 +1,547 @@
|
||||
"""
|
||||
Intent-Aware Result Analyzer
|
||||
|
||||
Analyzes research results based on user intent.
|
||||
Extracts exactly what the user needs from raw research data.
|
||||
|
||||
This is the key innovation - instead of generic analysis,
|
||||
we analyze results through the lens of what the user wants to accomplish.
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 1.0
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
|
||||
from models.research_intent_models import (
|
||||
ResearchIntent,
|
||||
IntentDrivenResearchResult,
|
||||
ExpectedDeliverable,
|
||||
StatisticWithCitation,
|
||||
ExpertQuote,
|
||||
CaseStudySummary,
|
||||
TrendAnalysis,
|
||||
ComparisonTable,
|
||||
ComparisonItem,
|
||||
ProsCons,
|
||||
SourceWithRelevance,
|
||||
)
|
||||
from models.research_persona_models import ResearchPersona
|
||||
from .intent_prompt_builder import IntentPromptBuilder
|
||||
|
||||
|
||||
class IntentAwareAnalyzer:
|
||||
"""
|
||||
Analyzes research results based on user intent.
|
||||
|
||||
Instead of generic summaries, this extracts exactly what the user
|
||||
needs: statistics, quotes, case studies, trends, etc.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the analyzer."""
|
||||
self.prompt_builder = IntentPromptBuilder()
|
||||
logger.info("IntentAwareAnalyzer initialized")
|
||||
|
||||
async def analyze(
|
||||
self,
|
||||
raw_results: Dict[str, Any],
|
||||
intent: ResearchIntent,
|
||||
research_persona: Optional[ResearchPersona] = None,
|
||||
) -> IntentDrivenResearchResult:
|
||||
"""
|
||||
Analyze raw research results based on user intent.
|
||||
|
||||
Args:
|
||||
raw_results: Raw results from Exa/Tavily/Google
|
||||
intent: The user's research intent
|
||||
research_persona: Optional persona for context
|
||||
|
||||
Returns:
|
||||
IntentDrivenResearchResult with extracted deliverables
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Analyzing results for intent: {intent.primary_question[:50]}...")
|
||||
|
||||
# Format raw results for analysis
|
||||
formatted_results = self._format_raw_results(raw_results)
|
||||
|
||||
# Build the analysis prompt
|
||||
prompt = self.prompt_builder.build_intent_aware_analysis_prompt(
|
||||
raw_results=formatted_results,
|
||||
intent=intent,
|
||||
research_persona=research_persona,
|
||||
)
|
||||
|
||||
# Define the expected JSON schema
|
||||
analysis_schema = self._build_analysis_schema(intent.expected_deliverables)
|
||||
|
||||
# Call LLM for analysis
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
result = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=analysis_schema,
|
||||
user_id=None
|
||||
)
|
||||
|
||||
if isinstance(result, dict) and "error" in result:
|
||||
logger.error(f"Intent-aware analysis failed: {result.get('error')}")
|
||||
return self._create_fallback_result(raw_results, intent)
|
||||
|
||||
# Parse and validate the result
|
||||
analyzed_result = self._parse_analysis_result(result, intent, raw_results)
|
||||
|
||||
logger.info(
|
||||
f"Analysis complete: {len(analyzed_result.key_takeaways)} takeaways, "
|
||||
f"{len(analyzed_result.statistics)} stats, "
|
||||
f"{len(analyzed_result.sources)} sources"
|
||||
)
|
||||
|
||||
return analyzed_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in intent-aware analysis: {e}")
|
||||
return self._create_fallback_result(raw_results, intent)
|
||||
|
||||
def _format_raw_results(self, raw_results: Dict[str, Any]) -> str:
|
||||
"""Format raw research results for LLM analysis."""
|
||||
|
||||
formatted_parts = []
|
||||
|
||||
# Extract content
|
||||
content = raw_results.get("content", "")
|
||||
if content:
|
||||
formatted_parts.append(f"=== MAIN CONTENT ===\n{content[:8000]}")
|
||||
|
||||
# Extract sources with their content
|
||||
sources = raw_results.get("sources", [])
|
||||
if sources:
|
||||
formatted_parts.append("\n=== SOURCES ===")
|
||||
for i, source in enumerate(sources[:15], 1): # Limit to 15 sources
|
||||
title = source.get("title", "Untitled")
|
||||
url = source.get("url", "")
|
||||
excerpt = source.get("excerpt", source.get("text", source.get("content", "")))
|
||||
|
||||
formatted_parts.append(f"\nSource {i}: {title}")
|
||||
formatted_parts.append(f"URL: {url}")
|
||||
if excerpt:
|
||||
formatted_parts.append(f"Content: {excerpt[:500]}")
|
||||
|
||||
# Extract grounding metadata if available (from Google)
|
||||
grounding = raw_results.get("grounding_metadata", {})
|
||||
if grounding:
|
||||
formatted_parts.append("\n=== GROUNDING DATA ===")
|
||||
formatted_parts.append(json.dumps(grounding, indent=2)[:2000])
|
||||
|
||||
# Extract any AI answers (from Tavily)
|
||||
answer = raw_results.get("answer", "")
|
||||
if answer:
|
||||
formatted_parts.append(f"\n=== AI-GENERATED ANSWER ===\n{answer}")
|
||||
|
||||
return "\n".join(formatted_parts)
|
||||
|
||||
def _build_analysis_schema(self, expected_deliverables: List[str]) -> Dict[str, Any]:
|
||||
"""Build JSON schema based on expected deliverables."""
|
||||
|
||||
# Base schema
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"primary_answer": {"type": "string"},
|
||||
"secondary_answers": {
|
||||
"type": "object",
|
||||
"additionalProperties": {"type": "string"}
|
||||
},
|
||||
"executive_summary": {"type": "string"},
|
||||
"key_takeaways": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"maxItems": 7
|
||||
},
|
||||
"confidence": {"type": "number"},
|
||||
"gaps_identified": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"follow_up_queries": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
},
|
||||
"required": ["primary_answer", "executive_summary", "key_takeaways", "confidence"]
|
||||
}
|
||||
|
||||
# Add deliverable-specific properties
|
||||
if ExpectedDeliverable.KEY_STATISTICS.value in expected_deliverables:
|
||||
schema["properties"]["statistics"] = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"statistic": {"type": "string"},
|
||||
"value": {"type": "string"},
|
||||
"context": {"type": "string"},
|
||||
"source": {"type": "string"},
|
||||
"url": {"type": "string"},
|
||||
"credibility": {"type": "number"},
|
||||
"recency": {"type": "string"}
|
||||
},
|
||||
"required": ["statistic", "context", "source", "url"]
|
||||
}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.EXPERT_QUOTES.value in expected_deliverables:
|
||||
schema["properties"]["expert_quotes"] = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"quote": {"type": "string"},
|
||||
"speaker": {"type": "string"},
|
||||
"title": {"type": "string"},
|
||||
"organization": {"type": "string"},
|
||||
"source": {"type": "string"},
|
||||
"url": {"type": "string"}
|
||||
},
|
||||
"required": ["quote", "speaker", "source", "url"]
|
||||
}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.CASE_STUDIES.value in expected_deliverables:
|
||||
schema["properties"]["case_studies"] = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"organization": {"type": "string"},
|
||||
"challenge": {"type": "string"},
|
||||
"solution": {"type": "string"},
|
||||
"outcome": {"type": "string"},
|
||||
"key_metrics": {"type": "array", "items": {"type": "string"}},
|
||||
"source": {"type": "string"},
|
||||
"url": {"type": "string"}
|
||||
},
|
||||
"required": ["title", "organization", "challenge", "solution", "outcome"]
|
||||
}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.TRENDS.value in expected_deliverables:
|
||||
schema["properties"]["trends"] = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"trend": {"type": "string"},
|
||||
"direction": {"type": "string"},
|
||||
"evidence": {"type": "array", "items": {"type": "string"}},
|
||||
"impact": {"type": "string"},
|
||||
"timeline": {"type": "string"},
|
||||
"sources": {"type": "array", "items": {"type": "string"}}
|
||||
},
|
||||
"required": ["trend", "direction", "evidence"]
|
||||
}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.COMPARISONS.value in expected_deliverables:
|
||||
schema["properties"]["comparisons"] = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"criteria": {"type": "array", "items": {"type": "string"}},
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"pros": {"type": "array", "items": {"type": "string"}},
|
||||
"cons": {"type": "array", "items": {"type": "string"}},
|
||||
"features": {"type": "object"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"verdict": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.PROS_CONS.value in expected_deliverables:
|
||||
schema["properties"]["pros_cons"] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"subject": {"type": "string"},
|
||||
"pros": {"type": "array", "items": {"type": "string"}},
|
||||
"cons": {"type": "array", "items": {"type": "string"}},
|
||||
"balanced_verdict": {"type": "string"}
|
||||
}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.BEST_PRACTICES.value in expected_deliverables:
|
||||
schema["properties"]["best_practices"] = {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.STEP_BY_STEP.value in expected_deliverables:
|
||||
schema["properties"]["step_by_step"] = {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.DEFINITIONS.value in expected_deliverables:
|
||||
schema["properties"]["definitions"] = {
|
||||
"type": "object",
|
||||
"additionalProperties": {"type": "string"}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.EXAMPLES.value in expected_deliverables:
|
||||
schema["properties"]["examples"] = {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
|
||||
if ExpectedDeliverable.PREDICTIONS.value in expected_deliverables:
|
||||
schema["properties"]["predictions"] = {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
|
||||
# Always include sources and suggested outline
|
||||
schema["properties"]["sources"] = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"url": {"type": "string"},
|
||||
"relevance_score": {"type": "number"},
|
||||
"relevance_reason": {"type": "string"},
|
||||
"content_type": {"type": "string"},
|
||||
"credibility_score": {"type": "number"}
|
||||
},
|
||||
"required": ["title", "url"]
|
||||
}
|
||||
}
|
||||
|
||||
schema["properties"]["suggested_outline"] = {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
|
||||
return schema
|
||||
|
||||
def _parse_analysis_result(
|
||||
self,
|
||||
result: Dict[str, Any],
|
||||
intent: ResearchIntent,
|
||||
raw_results: Dict[str, Any],
|
||||
) -> IntentDrivenResearchResult:
|
||||
"""Parse LLM analysis result into structured format."""
|
||||
|
||||
# Parse statistics
|
||||
statistics = []
|
||||
for stat in result.get("statistics", []):
|
||||
try:
|
||||
statistics.append(StatisticWithCitation(
|
||||
statistic=stat.get("statistic", ""),
|
||||
value=stat.get("value"),
|
||||
context=stat.get("context", ""),
|
||||
source=stat.get("source", ""),
|
||||
url=stat.get("url", ""),
|
||||
credibility=float(stat.get("credibility", 0.8)),
|
||||
recency=stat.get("recency"),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse statistic: {e}")
|
||||
|
||||
# Parse expert quotes
|
||||
expert_quotes = []
|
||||
for quote in result.get("expert_quotes", []):
|
||||
try:
|
||||
expert_quotes.append(ExpertQuote(
|
||||
quote=quote.get("quote", ""),
|
||||
speaker=quote.get("speaker", ""),
|
||||
title=quote.get("title"),
|
||||
organization=quote.get("organization"),
|
||||
context=quote.get("context"),
|
||||
source=quote.get("source", ""),
|
||||
url=quote.get("url", ""),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse expert quote: {e}")
|
||||
|
||||
# Parse case studies
|
||||
case_studies = []
|
||||
for cs in result.get("case_studies", []):
|
||||
try:
|
||||
case_studies.append(CaseStudySummary(
|
||||
title=cs.get("title", ""),
|
||||
organization=cs.get("organization", ""),
|
||||
challenge=cs.get("challenge", ""),
|
||||
solution=cs.get("solution", ""),
|
||||
outcome=cs.get("outcome", ""),
|
||||
key_metrics=cs.get("key_metrics", []),
|
||||
source=cs.get("source", ""),
|
||||
url=cs.get("url", ""),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse case study: {e}")
|
||||
|
||||
# Parse trends
|
||||
trends = []
|
||||
for trend in result.get("trends", []):
|
||||
try:
|
||||
trends.append(TrendAnalysis(
|
||||
trend=trend.get("trend", ""),
|
||||
direction=trend.get("direction", "growing"),
|
||||
evidence=trend.get("evidence", []),
|
||||
impact=trend.get("impact"),
|
||||
timeline=trend.get("timeline"),
|
||||
sources=trend.get("sources", []),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse trend: {e}")
|
||||
|
||||
# Parse comparisons
|
||||
comparisons = []
|
||||
for comp in result.get("comparisons", []):
|
||||
try:
|
||||
items = []
|
||||
for item in comp.get("items", []):
|
||||
items.append(ComparisonItem(
|
||||
name=item.get("name", ""),
|
||||
description=item.get("description"),
|
||||
pros=item.get("pros", []),
|
||||
cons=item.get("cons", []),
|
||||
features=item.get("features", {}),
|
||||
rating=item.get("rating"),
|
||||
source=item.get("source"),
|
||||
))
|
||||
comparisons.append(ComparisonTable(
|
||||
title=comp.get("title", ""),
|
||||
criteria=comp.get("criteria", []),
|
||||
items=items,
|
||||
winner=comp.get("winner"),
|
||||
verdict=comp.get("verdict"),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse comparison: {e}")
|
||||
|
||||
# Parse pros/cons
|
||||
pros_cons = None
|
||||
pc_data = result.get("pros_cons")
|
||||
if pc_data:
|
||||
try:
|
||||
pros_cons = ProsCons(
|
||||
subject=pc_data.get("subject", intent.original_input),
|
||||
pros=pc_data.get("pros", []),
|
||||
cons=pc_data.get("cons", []),
|
||||
balanced_verdict=pc_data.get("balanced_verdict", ""),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse pros/cons: {e}")
|
||||
|
||||
# Parse sources
|
||||
sources = []
|
||||
for src in result.get("sources", []):
|
||||
try:
|
||||
sources.append(SourceWithRelevance(
|
||||
title=src.get("title", ""),
|
||||
url=src.get("url", ""),
|
||||
excerpt=src.get("excerpt"),
|
||||
relevance_score=float(src.get("relevance_score", 0.8)),
|
||||
relevance_reason=src.get("relevance_reason"),
|
||||
content_type=src.get("content_type"),
|
||||
published_date=src.get("published_date"),
|
||||
credibility_score=float(src.get("credibility_score", 0.8)),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse source: {e}")
|
||||
|
||||
# If no sources from analysis, extract from raw results
|
||||
if not sources:
|
||||
sources = self._extract_sources_from_raw(raw_results)
|
||||
|
||||
return IntentDrivenResearchResult(
|
||||
success=True,
|
||||
primary_answer=result.get("primary_answer", ""),
|
||||
secondary_answers=result.get("secondary_answers", {}),
|
||||
statistics=statistics,
|
||||
expert_quotes=expert_quotes,
|
||||
case_studies=case_studies,
|
||||
comparisons=comparisons,
|
||||
trends=trends,
|
||||
best_practices=result.get("best_practices", []),
|
||||
step_by_step=result.get("step_by_step", []),
|
||||
pros_cons=pros_cons,
|
||||
definitions=result.get("definitions", {}),
|
||||
examples=result.get("examples", []),
|
||||
predictions=result.get("predictions", []),
|
||||
executive_summary=result.get("executive_summary", ""),
|
||||
key_takeaways=result.get("key_takeaways", []),
|
||||
suggested_outline=result.get("suggested_outline", []),
|
||||
sources=sources,
|
||||
raw_content=self._format_raw_results(raw_results)[:5000],
|
||||
confidence=float(result.get("confidence", 0.7)),
|
||||
gaps_identified=result.get("gaps_identified", []),
|
||||
follow_up_queries=result.get("follow_up_queries", []),
|
||||
original_intent=intent,
|
||||
)
|
||||
|
||||
def _extract_sources_from_raw(self, raw_results: Dict[str, Any]) -> List[SourceWithRelevance]:
|
||||
"""Extract sources from raw results when analysis doesn't provide them."""
|
||||
|
||||
sources = []
|
||||
for src in raw_results.get("sources", [])[:10]:
|
||||
try:
|
||||
sources.append(SourceWithRelevance(
|
||||
title=src.get("title", "Untitled"),
|
||||
url=src.get("url", ""),
|
||||
excerpt=src.get("excerpt", src.get("text", ""))[:200],
|
||||
relevance_score=0.8,
|
||||
credibility_score=float(src.get("credibility_score", 0.8)),
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract source: {e}")
|
||||
|
||||
return sources
|
||||
|
||||
def _create_fallback_result(
|
||||
self,
|
||||
raw_results: Dict[str, Any],
|
||||
intent: ResearchIntent,
|
||||
) -> IntentDrivenResearchResult:
|
||||
"""Create a fallback result when AI analysis fails."""
|
||||
|
||||
# Extract basic information from raw results
|
||||
content = raw_results.get("content", "")
|
||||
sources = self._extract_sources_from_raw(raw_results)
|
||||
|
||||
# Create basic takeaways from content
|
||||
key_takeaways = []
|
||||
if content:
|
||||
sentences = content.split(". ")[:5]
|
||||
key_takeaways = [s.strip() + "." for s in sentences if len(s) > 20]
|
||||
|
||||
return IntentDrivenResearchResult(
|
||||
success=True,
|
||||
primary_answer=f"Research findings for: {intent.primary_question}",
|
||||
secondary_answers={},
|
||||
executive_summary=content[:300] if content else "Research completed",
|
||||
key_takeaways=key_takeaways,
|
||||
sources=sources,
|
||||
raw_content=self._format_raw_results(raw_results)[:5000],
|
||||
confidence=0.5,
|
||||
gaps_identified=[
|
||||
"AI analysis failed - showing raw results",
|
||||
"Manual review recommended"
|
||||
],
|
||||
follow_up_queries=[],
|
||||
original_intent=intent,
|
||||
)
|
||||
627
backend/services/research/intent/intent_prompt_builder.py
Normal file
627
backend/services/research/intent/intent_prompt_builder.py
Normal file
@@ -0,0 +1,627 @@
|
||||
"""
|
||||
Intent Prompt Builder
|
||||
|
||||
Builds comprehensive AI prompts for:
|
||||
1. Intent inference from user input
|
||||
2. Targeted query generation
|
||||
3. Intent-aware result analysis
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 1.0
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
|
||||
from models.research_intent_models import (
|
||||
ResearchIntent,
|
||||
ResearchPurpose,
|
||||
ContentOutput,
|
||||
ExpectedDeliverable,
|
||||
ResearchDepthLevel,
|
||||
)
|
||||
from models.research_persona_models import ResearchPersona
|
||||
|
||||
|
||||
class IntentPromptBuilder:
|
||||
"""Builds prompts for intent-driven research."""
|
||||
|
||||
# Purpose explanations for the AI
|
||||
PURPOSE_EXPLANATIONS = {
|
||||
ResearchPurpose.LEARN: "User wants to understand a topic for personal knowledge",
|
||||
ResearchPurpose.CREATE_CONTENT: "User will create content (blog, video, podcast) from this research",
|
||||
ResearchPurpose.MAKE_DECISION: "User needs to make a choice/decision based on research",
|
||||
ResearchPurpose.COMPARE: "User wants to compare alternatives or competitors",
|
||||
ResearchPurpose.SOLVE_PROBLEM: "User is looking for a solution to a specific problem",
|
||||
ResearchPurpose.FIND_DATA: "User needs specific statistics, facts, or citations",
|
||||
ResearchPurpose.EXPLORE_TRENDS: "User wants to understand current/future trends",
|
||||
ResearchPurpose.VALIDATE: "User wants to verify or fact-check information",
|
||||
ResearchPurpose.GENERATE_IDEAS: "User wants to brainstorm content ideas",
|
||||
}
|
||||
|
||||
# Deliverable descriptions
|
||||
DELIVERABLE_DESCRIPTIONS = {
|
||||
ExpectedDeliverable.KEY_STATISTICS: "Numbers, percentages, data points with citations",
|
||||
ExpectedDeliverable.EXPERT_QUOTES: "Authoritative quotes from industry experts",
|
||||
ExpectedDeliverable.CASE_STUDIES: "Real examples and success stories",
|
||||
ExpectedDeliverable.COMPARISONS: "Side-by-side analysis tables",
|
||||
ExpectedDeliverable.TRENDS: "Current and emerging industry trends",
|
||||
ExpectedDeliverable.BEST_PRACTICES: "Recommended approaches and guidelines",
|
||||
ExpectedDeliverable.STEP_BY_STEP: "Process guides and how-to instructions",
|
||||
ExpectedDeliverable.PROS_CONS: "Advantages and disadvantages analysis",
|
||||
ExpectedDeliverable.DEFINITIONS: "Clear explanations of concepts and terms",
|
||||
ExpectedDeliverable.CITATIONS: "Authoritative sources for reference",
|
||||
ExpectedDeliverable.EXAMPLES: "Concrete examples to illustrate points",
|
||||
ExpectedDeliverable.PREDICTIONS: "Future outlook and predictions",
|
||||
}
|
||||
|
||||
def build_intent_inference_prompt(
|
||||
self,
|
||||
user_input: str,
|
||||
keywords: List[str],
|
||||
research_persona: Optional[ResearchPersona] = None,
|
||||
competitor_data: Optional[List[Dict]] = None,
|
||||
industry: Optional[str] = None,
|
||||
target_audience: Optional[str] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Build prompt for inferring user's research intent.
|
||||
|
||||
This prompt analyzes the user's input and determines:
|
||||
- What they want to accomplish
|
||||
- What questions they need answered
|
||||
- What specific deliverables they need
|
||||
"""
|
||||
|
||||
# Build persona context
|
||||
persona_context = self._build_persona_context(research_persona, industry, target_audience)
|
||||
|
||||
# Build competitor context
|
||||
competitor_context = self._build_competitor_context(competitor_data)
|
||||
|
||||
prompt = f"""You are an expert research intent analyzer. Your job is to understand what a content creator REALLY needs from their research.
|
||||
|
||||
## USER INPUT
|
||||
"{user_input}"
|
||||
|
||||
{f"KEYWORDS: {', '.join(keywords)}" if keywords else ""}
|
||||
|
||||
## USER CONTEXT
|
||||
{persona_context}
|
||||
|
||||
{competitor_context}
|
||||
|
||||
## YOUR TASK
|
||||
|
||||
Analyze the user's input and infer their research intent. Determine:
|
||||
|
||||
1. **INPUT TYPE**: Is this:
|
||||
- "keywords": Simple topic keywords (e.g., "AI healthcare 2025")
|
||||
- "question": A specific question (e.g., "What are the best AI tools for healthcare?")
|
||||
- "goal": A goal statement (e.g., "I need to write a blog about AI in healthcare")
|
||||
- "mixed": Combination of above
|
||||
|
||||
2. **PRIMARY QUESTION**: What is the main question to answer? Convert their input into a clear question.
|
||||
|
||||
3. **SECONDARY QUESTIONS**: What related questions should also be answered? (3-5 questions)
|
||||
|
||||
4. **PURPOSE**: Why are they researching? Choose ONE:
|
||||
- "learn": Understand a topic for personal knowledge
|
||||
- "create_content": Create content (blog, video, podcast)
|
||||
- "make_decision": Make a choice between options
|
||||
- "compare": Compare alternatives/competitors
|
||||
- "solve_problem": Find a solution
|
||||
- "find_data": Get specific statistics/facts
|
||||
- "explore_trends": Understand industry trends
|
||||
- "validate": Verify claims/information
|
||||
- "generate_ideas": Brainstorm ideas
|
||||
|
||||
5. **CONTENT OUTPUT**: What will they create? Choose ONE:
|
||||
- "blog", "podcast", "video", "social_post", "newsletter", "presentation", "report", "whitepaper", "email", "general"
|
||||
|
||||
6. **EXPECTED DELIVERABLES**: What specific outputs do they need? Choose ALL that apply:
|
||||
- "key_statistics": Numbers, data points
|
||||
- "expert_quotes": Authoritative quotes
|
||||
- "case_studies": Real examples
|
||||
- "comparisons": Side-by-side analysis
|
||||
- "trends": Industry trends
|
||||
- "best_practices": Recommendations
|
||||
- "step_by_step": How-to guides
|
||||
- "pros_cons": Advantages/disadvantages
|
||||
- "definitions": Concept explanations
|
||||
- "citations": Source references
|
||||
- "examples": Concrete examples
|
||||
- "predictions": Future outlook
|
||||
|
||||
7. **DEPTH**: How deep should the research go?
|
||||
- "overview": Quick summary
|
||||
- "detailed": In-depth analysis
|
||||
- "expert": Comprehensive expert-level
|
||||
|
||||
8. **FOCUS AREAS**: What specific aspects should be researched? (2-4 areas)
|
||||
|
||||
9. **PERSPECTIVE**: From whose viewpoint? (e.g., "marketing manager", "small business owner")
|
||||
|
||||
10. **TIME SENSITIVITY**: Is recency important?
|
||||
- "real_time": Latest only (past 24-48 hours)
|
||||
- "recent": Past week/month
|
||||
- "historical": Include older content
|
||||
- "evergreen": Timeless content
|
||||
|
||||
11. **CONFIDENCE**: How confident are you in this inference? (0.0-1.0)
|
||||
- If < 0.7, set needs_clarification to true and provide clarifying_questions
|
||||
|
||||
## OUTPUT FORMAT
|
||||
|
||||
Return a JSON object:
|
||||
```json
|
||||
{{
|
||||
"input_type": "keywords|question|goal|mixed",
|
||||
"primary_question": "The main question to answer",
|
||||
"secondary_questions": ["question 1", "question 2", "question 3"],
|
||||
"purpose": "one of the purpose options",
|
||||
"content_output": "one of the content options",
|
||||
"expected_deliverables": ["deliverable1", "deliverable2"],
|
||||
"depth": "overview|detailed|expert",
|
||||
"focus_areas": ["area1", "area2"],
|
||||
"perspective": "target perspective or null",
|
||||
"time_sensitivity": "real_time|recent|historical|evergreen",
|
||||
"confidence": 0.85,
|
||||
"needs_clarification": false,
|
||||
"clarifying_questions": [],
|
||||
"analysis_summary": "Brief summary of what the user wants"
|
||||
}}
|
||||
```
|
||||
|
||||
## IMPORTANT RULES
|
||||
|
||||
1. Always convert vague input into a specific primary question
|
||||
2. Infer deliverables based on purpose (e.g., create_content → statistics + examples)
|
||||
3. Use persona context to refine perspective and focus areas
|
||||
4. If input is ambiguous, provide clarifying questions
|
||||
5. Default to "detailed" depth unless input suggests otherwise
|
||||
6. For content creation, include relevant deliverables automatically
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
def build_query_generation_prompt(
|
||||
self,
|
||||
intent: ResearchIntent,
|
||||
research_persona: Optional[ResearchPersona] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Build prompt for generating targeted research queries.
|
||||
|
||||
Generates multiple queries, each targeting a specific deliverable.
|
||||
"""
|
||||
|
||||
deliverables_list = "\n".join([
|
||||
f"- {d}: {self.DELIVERABLE_DESCRIPTIONS.get(ExpectedDeliverable(d), d)}"
|
||||
for d in intent.expected_deliverables
|
||||
])
|
||||
|
||||
persona_keywords = ""
|
||||
if research_persona and research_persona.suggested_keywords:
|
||||
persona_keywords = f"\nSUGGESTED KEYWORDS FROM PERSONA: {', '.join(research_persona.suggested_keywords[:10])}"
|
||||
|
||||
prompt = f"""You are a research query optimizer. Generate multiple targeted search queries based on the user's research intent.
|
||||
|
||||
## RESEARCH INTENT
|
||||
|
||||
PRIMARY QUESTION: {intent.primary_question}
|
||||
|
||||
SECONDARY QUESTIONS:
|
||||
{chr(10).join(f'- {q}' for q in intent.secondary_questions) if intent.secondary_questions else 'None'}
|
||||
|
||||
PURPOSE: {intent.purpose} - {self.PURPOSE_EXPLANATIONS.get(ResearchPurpose(intent.purpose), intent.purpose)}
|
||||
|
||||
CONTENT OUTPUT: {intent.content_output}
|
||||
|
||||
EXPECTED DELIVERABLES:
|
||||
{deliverables_list}
|
||||
|
||||
DEPTH: {intent.depth}
|
||||
|
||||
FOCUS AREAS: {', '.join(intent.focus_areas) if intent.focus_areas else 'General'}
|
||||
|
||||
PERSPECTIVE: {intent.perspective or 'General audience'}
|
||||
|
||||
TIME SENSITIVITY: {intent.time_sensitivity or 'No specific requirement'}
|
||||
{persona_keywords}
|
||||
|
||||
## YOUR TASK
|
||||
|
||||
Generate 4-8 targeted research queries. Each query should:
|
||||
1. Target a specific deliverable or question
|
||||
2. Be optimized for semantic search (Exa/Tavily)
|
||||
3. Include relevant context for better results
|
||||
|
||||
For each query, specify:
|
||||
- The query string
|
||||
- What deliverable it targets
|
||||
- Best provider (exa for semantic/deep, tavily for news/real-time, google for factual)
|
||||
- Priority (1-5, higher = more important)
|
||||
- What we expect to find
|
||||
|
||||
## OUTPUT FORMAT
|
||||
|
||||
Return a JSON object:
|
||||
```json
|
||||
{{
|
||||
"queries": [
|
||||
{{
|
||||
"query": "Healthcare AI adoption statistics 2025 hospitals implementation data",
|
||||
"purpose": "key_statistics",
|
||||
"provider": "exa",
|
||||
"priority": 5,
|
||||
"expected_results": "Statistics on hospital AI adoption rates"
|
||||
}},
|
||||
{{
|
||||
"query": "AI healthcare trends predictions future outlook 2025 2026",
|
||||
"purpose": "trends",
|
||||
"provider": "tavily",
|
||||
"priority": 4,
|
||||
"expected_results": "Current trends and future predictions in healthcare AI"
|
||||
}}
|
||||
],
|
||||
"enhanced_keywords": ["keyword1", "keyword2", "keyword3"],
|
||||
"research_angles": [
|
||||
"Angle 1: Focus on adoption challenges",
|
||||
"Angle 2: Focus on ROI and outcomes"
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
## QUERY OPTIMIZATION RULES
|
||||
|
||||
1. For STATISTICS: Include words like "statistics", "data", "percentage", "report", "study"
|
||||
2. For CASE STUDIES: Include "case study", "success story", "implementation", "example"
|
||||
3. For TRENDS: Include "trends", "future", "predictions", "emerging", year numbers
|
||||
4. For EXPERT QUOTES: Include expert names if known, or "expert opinion", "interview"
|
||||
5. For COMPARISONS: Include "vs", "compare", "comparison", "alternative"
|
||||
6. For NEWS/REAL-TIME: Use Tavily, include recent year/month
|
||||
7. For ACADEMIC/DEEP: Use Exa with neural search
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
def build_intent_aware_analysis_prompt(
|
||||
self,
|
||||
raw_results: str,
|
||||
intent: ResearchIntent,
|
||||
research_persona: Optional[ResearchPersona] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Build prompt for analyzing research results based on user intent.
|
||||
|
||||
This is the key prompt that extracts exactly what the user needs.
|
||||
"""
|
||||
|
||||
purpose_explanation = self.PURPOSE_EXPLANATIONS.get(
|
||||
ResearchPurpose(intent.purpose),
|
||||
intent.purpose
|
||||
)
|
||||
|
||||
deliverables_instructions = self._build_deliverables_instructions(intent.expected_deliverables)
|
||||
|
||||
perspective_instruction = ""
|
||||
if intent.perspective:
|
||||
perspective_instruction = f"\n**PERSPECTIVE**: Analyze results from the viewpoint of: {intent.perspective}"
|
||||
|
||||
prompt = f"""You are a research analyst helping a content creator find exactly what they need. Your job is to analyze raw research results and extract precisely what the user is looking for.
|
||||
|
||||
## USER'S RESEARCH INTENT
|
||||
|
||||
PRIMARY QUESTION: {intent.primary_question}
|
||||
|
||||
SECONDARY QUESTIONS:
|
||||
{chr(10).join(f'- {q}' for q in intent.secondary_questions) if intent.secondary_questions else 'None specified'}
|
||||
|
||||
PURPOSE: {intent.purpose}
|
||||
→ {purpose_explanation}
|
||||
|
||||
CONTENT OUTPUT: {intent.content_output}
|
||||
|
||||
EXPECTED DELIVERABLES: {', '.join(intent.expected_deliverables)}
|
||||
|
||||
FOCUS AREAS: {', '.join(intent.focus_areas) if intent.focus_areas else 'General'}
|
||||
{perspective_instruction}
|
||||
|
||||
## RAW RESEARCH RESULTS
|
||||
|
||||
{raw_results[:15000]} # Truncated for token limits
|
||||
|
||||
## YOUR TASK
|
||||
|
||||
Analyze the raw research results and extract EXACTLY what the user needs.
|
||||
|
||||
{deliverables_instructions}
|
||||
|
||||
## OUTPUT REQUIREMENTS
|
||||
|
||||
Provide results in this JSON structure:
|
||||
|
||||
```json
|
||||
{{
|
||||
"primary_answer": "Direct 2-3 sentence answer to the primary question",
|
||||
"secondary_answers": {{
|
||||
"Question 1?": "Answer to question 1",
|
||||
"Question 2?": "Answer to question 2"
|
||||
}},
|
||||
"executive_summary": "2-3 sentence executive summary of all findings",
|
||||
"key_takeaways": [
|
||||
"Key takeaway 1 - most important finding",
|
||||
"Key takeaway 2",
|
||||
"Key takeaway 3",
|
||||
"Key takeaway 4",
|
||||
"Key takeaway 5"
|
||||
],
|
||||
"statistics": [
|
||||
{{
|
||||
"statistic": "72% of hospitals plan to adopt AI by 2025",
|
||||
"value": "72%",
|
||||
"context": "Survey of 500 US hospitals in 2024",
|
||||
"source": "Healthcare AI Report 2024",
|
||||
"url": "https://example.com/report",
|
||||
"credibility": 0.9,
|
||||
"recency": "2024"
|
||||
}}
|
||||
],
|
||||
"expert_quotes": [
|
||||
{{
|
||||
"quote": "AI will revolutionize patient care within 5 years",
|
||||
"speaker": "Dr. Jane Smith",
|
||||
"title": "Chief Medical Officer",
|
||||
"organization": "HealthTech Inc",
|
||||
"source": "TechCrunch",
|
||||
"url": "https://example.com/article"
|
||||
}}
|
||||
],
|
||||
"case_studies": [
|
||||
{{
|
||||
"title": "Mayo Clinic AI Implementation",
|
||||
"organization": "Mayo Clinic",
|
||||
"challenge": "High patient wait times",
|
||||
"solution": "AI-powered triage system",
|
||||
"outcome": "40% reduction in wait times",
|
||||
"key_metrics": ["40% faster triage", "95% patient satisfaction"],
|
||||
"source": "Healthcare IT News",
|
||||
"url": "https://example.com"
|
||||
}}
|
||||
],
|
||||
"trends": [
|
||||
{{
|
||||
"trend": "AI-assisted diagnostics adoption",
|
||||
"direction": "growing",
|
||||
"evidence": ["25% YoY growth", "Major hospital chains investing"],
|
||||
"impact": "Could reduce misdiagnosis by 30%",
|
||||
"timeline": "Expected mainstream by 2027",
|
||||
"sources": ["url1", "url2"]
|
||||
}}
|
||||
],
|
||||
"comparisons": [
|
||||
{{
|
||||
"title": "Top AI Healthcare Platforms",
|
||||
"criteria": ["Cost", "Features", "Support"],
|
||||
"items": [
|
||||
{{
|
||||
"name": "Platform A",
|
||||
"pros": ["Easy integration", "Good support"],
|
||||
"cons": ["Higher cost"],
|
||||
"features": {{"Cost": "$500/month", "Support": "24/7"}}
|
||||
}}
|
||||
],
|
||||
"verdict": "Platform A best for large hospitals"
|
||||
}}
|
||||
],
|
||||
"best_practices": [
|
||||
"Start with a pilot program before full deployment",
|
||||
"Ensure staff training is comprehensive"
|
||||
],
|
||||
"step_by_step": [
|
||||
"Step 1: Assess current infrastructure",
|
||||
"Step 2: Define use cases",
|
||||
"Step 3: Select vendor"
|
||||
],
|
||||
"pros_cons": {{
|
||||
"subject": "AI in Healthcare",
|
||||
"pros": ["Improved accuracy", "Cost savings"],
|
||||
"cons": ["Initial investment", "Training required"],
|
||||
"balanced_verdict": "Benefits outweigh costs for most hospitals"
|
||||
}},
|
||||
"definitions": {{
|
||||
"Clinical AI": "AI systems designed for medical diagnosis and treatment recommendations"
|
||||
}},
|
||||
"examples": [
|
||||
"Example: Hospital X reduced readmissions by 25% using predictive AI"
|
||||
],
|
||||
"predictions": [
|
||||
"By 2030, AI will assist in 80% of initial diagnoses"
|
||||
],
|
||||
"suggested_outline": [
|
||||
"1. Introduction: The AI Healthcare Revolution",
|
||||
"2. Current State: Where We Are Today",
|
||||
"3. Key Statistics and Trends",
|
||||
"4. Case Studies: Success Stories",
|
||||
"5. Implementation Guide",
|
||||
"6. Future Outlook"
|
||||
],
|
||||
"sources": [
|
||||
{{
|
||||
"title": "Healthcare AI Report 2024",
|
||||
"url": "https://example.com",
|
||||
"relevance_score": 0.95,
|
||||
"relevance_reason": "Directly addresses adoption statistics",
|
||||
"content_type": "research report",
|
||||
"credibility_score": 0.9
|
||||
}}
|
||||
],
|
||||
"confidence": 0.85,
|
||||
"gaps_identified": [
|
||||
"Specific cost data for small clinics not found",
|
||||
"Limited information on regulatory challenges"
|
||||
],
|
||||
"follow_up_queries": [
|
||||
"AI healthcare regulations FDA 2025",
|
||||
"Small clinic AI implementation costs"
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
## CRITICAL RULES
|
||||
|
||||
1. **ONLY include information directly from the raw results** - do not make up data
|
||||
2. **ALWAYS include source URLs** for every statistic, quote, and case study
|
||||
3. **If a deliverable type has no relevant data**, return an empty array for it
|
||||
4. **Prioritize recency and credibility** when multiple sources conflict
|
||||
5. **Answer the PRIMARY QUESTION directly** in 2-3 clear sentences
|
||||
6. **Keep KEY TAKEAWAYS to 5-7 points** - the most important findings
|
||||
7. **Add to gaps_identified** if expected information is missing
|
||||
8. **Suggest follow_up_queries** for gaps or incomplete areas
|
||||
9. **Rate confidence** based on how well results match the user's intent
|
||||
10. **Include deliverables ONLY if they are in expected_deliverables** or critical to the question
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
def _build_persona_context(
|
||||
self,
|
||||
research_persona: Optional[ResearchPersona],
|
||||
industry: Optional[str],
|
||||
target_audience: Optional[str],
|
||||
) -> str:
|
||||
"""Build persona context section for prompts."""
|
||||
|
||||
if not research_persona and not industry:
|
||||
return "No specific persona context available."
|
||||
|
||||
context_parts = []
|
||||
|
||||
if research_persona:
|
||||
context_parts.append(f"INDUSTRY: {research_persona.default_industry}")
|
||||
context_parts.append(f"TARGET AUDIENCE: {research_persona.default_target_audience}")
|
||||
if research_persona.suggested_keywords:
|
||||
context_parts.append(f"TYPICAL TOPICS: {', '.join(research_persona.suggested_keywords[:5])}")
|
||||
if research_persona.research_angles:
|
||||
context_parts.append(f"RESEARCH ANGLES: {', '.join(research_persona.research_angles[:3])}")
|
||||
else:
|
||||
if industry:
|
||||
context_parts.append(f"INDUSTRY: {industry}")
|
||||
if target_audience:
|
||||
context_parts.append(f"TARGET AUDIENCE: {target_audience}")
|
||||
|
||||
return "\n".join(context_parts)
|
||||
|
||||
def _build_competitor_context(self, competitor_data: Optional[List[Dict]]) -> str:
|
||||
"""Build competitor context section for prompts."""
|
||||
|
||||
if not competitor_data:
|
||||
return ""
|
||||
|
||||
competitor_names = []
|
||||
for comp in competitor_data[:5]: # Limit to 5
|
||||
name = comp.get("name") or comp.get("domain") or comp.get("url", "Unknown")
|
||||
competitor_names.append(name)
|
||||
|
||||
if competitor_names:
|
||||
return f"\nKNOWN COMPETITORS: {', '.join(competitor_names)}"
|
||||
|
||||
return ""
|
||||
|
||||
def _build_deliverables_instructions(self, expected_deliverables: List[str]) -> str:
|
||||
"""Build specific extraction instructions for each expected deliverable."""
|
||||
|
||||
instructions = ["### EXTRACTION INSTRUCTIONS\n"]
|
||||
instructions.append("For each requested deliverable, extract the following:\n")
|
||||
|
||||
deliverable_instructions = {
|
||||
ExpectedDeliverable.KEY_STATISTICS: """
|
||||
**STATISTICS**:
|
||||
- Extract ALL relevant statistics with exact numbers
|
||||
- Include source attribution (publication name, URL)
|
||||
- Note the recency of the data
|
||||
- Rate credibility based on source authority
|
||||
- Format: statistic statement, value, context, source, URL, credibility score
|
||||
""",
|
||||
ExpectedDeliverable.EXPERT_QUOTES: """
|
||||
**EXPERT QUOTES**:
|
||||
- Extract authoritative quotes from named experts
|
||||
- Include speaker name, title, and organization
|
||||
- Provide context for the quote
|
||||
- Include source URL
|
||||
""",
|
||||
ExpectedDeliverable.CASE_STUDIES: """
|
||||
**CASE STUDIES**:
|
||||
- Summarize each case study: challenge → solution → outcome
|
||||
- Include key metrics and results
|
||||
- Name the organization involved
|
||||
- Provide source URL
|
||||
""",
|
||||
ExpectedDeliverable.TRENDS: """
|
||||
**TRENDS**:
|
||||
- Identify current and emerging trends
|
||||
- Note direction: growing, declining, emerging, or stable
|
||||
- List supporting evidence
|
||||
- Include timeline predictions if available
|
||||
- Cite sources
|
||||
""",
|
||||
ExpectedDeliverable.COMPARISONS: """
|
||||
**COMPARISONS**:
|
||||
- Build comparison tables where applicable
|
||||
- Define clear comparison criteria
|
||||
- List pros and cons for each option
|
||||
- Provide a verdict/recommendation if data supports it
|
||||
""",
|
||||
ExpectedDeliverable.BEST_PRACTICES: """
|
||||
**BEST PRACTICES**:
|
||||
- Extract recommended approaches
|
||||
- Provide actionable guidelines
|
||||
- Order by importance or sequence
|
||||
""",
|
||||
ExpectedDeliverable.STEP_BY_STEP: """
|
||||
**STEP BY STEP**:
|
||||
- Extract process/how-to instructions
|
||||
- Number steps clearly
|
||||
- Include any prerequisites or requirements
|
||||
""",
|
||||
ExpectedDeliverable.PROS_CONS: """
|
||||
**PROS AND CONS**:
|
||||
- List advantages (pros)
|
||||
- List disadvantages (cons)
|
||||
- Provide a balanced verdict
|
||||
""",
|
||||
ExpectedDeliverable.DEFINITIONS: """
|
||||
**DEFINITIONS**:
|
||||
- Extract clear explanations of key terms and concepts
|
||||
- Keep definitions concise but comprehensive
|
||||
""",
|
||||
ExpectedDeliverable.EXAMPLES: """
|
||||
**EXAMPLES**:
|
||||
- Extract concrete examples that illustrate key points
|
||||
- Include real-world applications
|
||||
""",
|
||||
ExpectedDeliverable.PREDICTIONS: """
|
||||
**PREDICTIONS**:
|
||||
- Extract future outlook and predictions
|
||||
- Note the source and their track record if known
|
||||
- Include timeframes where mentioned
|
||||
""",
|
||||
ExpectedDeliverable.CITATIONS: """
|
||||
**CITATIONS**:
|
||||
- List all authoritative sources with URLs
|
||||
- Rate credibility and relevance
|
||||
- Note content type (research, news, opinion, etc.)
|
||||
""",
|
||||
}
|
||||
|
||||
for deliverable in expected_deliverables:
|
||||
try:
|
||||
d_enum = ExpectedDeliverable(deliverable)
|
||||
if d_enum in deliverable_instructions:
|
||||
instructions.append(deliverable_instructions[d_enum])
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return "\n".join(instructions)
|
||||
387
backend/services/research/intent/intent_query_generator.py
Normal file
387
backend/services/research/intent/intent_query_generator.py
Normal file
@@ -0,0 +1,387 @@
|
||||
"""
|
||||
Intent Query Generator
|
||||
|
||||
Generates multiple targeted research queries based on user intent.
|
||||
Each query targets a specific deliverable or question.
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 1.0
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
|
||||
from models.research_intent_models import (
|
||||
ResearchIntent,
|
||||
ResearchQuery,
|
||||
ExpectedDeliverable,
|
||||
ResearchPurpose,
|
||||
)
|
||||
from models.research_persona_models import ResearchPersona
|
||||
from .intent_prompt_builder import IntentPromptBuilder
|
||||
|
||||
|
||||
class IntentQueryGenerator:
|
||||
"""
|
||||
Generates targeted research queries based on user intent.
|
||||
|
||||
Instead of a single generic search, generates multiple queries
|
||||
each targeting a specific deliverable or question.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the query generator."""
|
||||
self.prompt_builder = IntentPromptBuilder()
|
||||
logger.info("IntentQueryGenerator initialized")
|
||||
|
||||
async def generate_queries(
|
||||
self,
|
||||
intent: ResearchIntent,
|
||||
research_persona: Optional[ResearchPersona] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate targeted research queries based on intent.
|
||||
|
||||
Args:
|
||||
intent: The inferred research intent
|
||||
research_persona: Optional persona for context
|
||||
|
||||
Returns:
|
||||
Dict with queries, enhanced_keywords, and research_angles
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Generating queries for: {intent.primary_question[:50]}...")
|
||||
|
||||
# Build the query generation prompt
|
||||
prompt = self.prompt_builder.build_query_generation_prompt(
|
||||
intent=intent,
|
||||
research_persona=research_persona,
|
||||
)
|
||||
|
||||
# Define the expected JSON schema
|
||||
query_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"queries": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string"},
|
||||
"purpose": {"type": "string"},
|
||||
"provider": {"type": "string"},
|
||||
"priority": {"type": "integer"},
|
||||
"expected_results": {"type": "string"}
|
||||
},
|
||||
"required": ["query", "purpose", "provider", "priority", "expected_results"]
|
||||
}
|
||||
},
|
||||
"enhanced_keywords": {"type": "array", "items": {"type": "string"}},
|
||||
"research_angles": {"type": "array", "items": {"type": "string"}}
|
||||
},
|
||||
"required": ["queries", "enhanced_keywords", "research_angles"]
|
||||
}
|
||||
|
||||
# Call LLM for query generation
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
result = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=query_schema,
|
||||
user_id=None
|
||||
)
|
||||
|
||||
if isinstance(result, dict) and "error" in result:
|
||||
logger.error(f"Query generation failed: {result.get('error')}")
|
||||
return self._create_fallback_queries(intent)
|
||||
|
||||
# Parse queries
|
||||
queries = self._parse_queries(result.get("queries", []))
|
||||
|
||||
# Ensure we have queries for all expected deliverables
|
||||
queries = self._ensure_deliverable_coverage(queries, intent)
|
||||
|
||||
# Sort by priority
|
||||
queries.sort(key=lambda q: q.priority, reverse=True)
|
||||
|
||||
logger.info(f"Generated {len(queries)} targeted queries")
|
||||
|
||||
return {
|
||||
"queries": queries,
|
||||
"enhanced_keywords": result.get("enhanced_keywords", []),
|
||||
"research_angles": result.get("research_angles", []),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating queries: {e}")
|
||||
return self._create_fallback_queries(intent)
|
||||
|
||||
def _parse_queries(self, raw_queries: List[Dict]) -> List[ResearchQuery]:
|
||||
"""Parse raw query data into ResearchQuery objects."""
|
||||
|
||||
queries = []
|
||||
for q in raw_queries:
|
||||
try:
|
||||
# Validate purpose
|
||||
purpose_str = q.get("purpose", "key_statistics")
|
||||
try:
|
||||
purpose = ExpectedDeliverable(purpose_str)
|
||||
except ValueError:
|
||||
purpose = ExpectedDeliverable.KEY_STATISTICS
|
||||
|
||||
query = ResearchQuery(
|
||||
query=q.get("query", ""),
|
||||
purpose=purpose,
|
||||
provider=q.get("provider", "exa"),
|
||||
priority=min(max(int(q.get("priority", 3)), 1), 5), # Clamp 1-5
|
||||
expected_results=q.get("expected_results", ""),
|
||||
)
|
||||
queries.append(query)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse query: {e}")
|
||||
continue
|
||||
|
||||
return queries
|
||||
|
||||
def _ensure_deliverable_coverage(
|
||||
self,
|
||||
queries: List[ResearchQuery],
|
||||
intent: ResearchIntent,
|
||||
) -> List[ResearchQuery]:
|
||||
"""Ensure we have queries for all expected deliverables."""
|
||||
|
||||
# Get deliverables already covered
|
||||
covered = set(q.purpose.value for q in queries)
|
||||
|
||||
# Check for missing deliverables
|
||||
for deliverable in intent.expected_deliverables:
|
||||
if deliverable not in covered:
|
||||
# Generate a query for this deliverable
|
||||
query = self._generate_query_for_deliverable(
|
||||
deliverable=deliverable,
|
||||
intent=intent,
|
||||
)
|
||||
queries.append(query)
|
||||
|
||||
return queries
|
||||
|
||||
def _generate_query_for_deliverable(
|
||||
self,
|
||||
deliverable: str,
|
||||
intent: ResearchIntent,
|
||||
) -> ResearchQuery:
|
||||
"""Generate a query targeting a specific deliverable."""
|
||||
|
||||
# Extract topic from primary question
|
||||
topic = intent.original_input
|
||||
|
||||
# Query templates by deliverable type
|
||||
templates = {
|
||||
ExpectedDeliverable.KEY_STATISTICS.value: {
|
||||
"query": f"{topic} statistics data report study",
|
||||
"provider": "exa",
|
||||
"priority": 5,
|
||||
"expected": "Statistical data and research findings",
|
||||
},
|
||||
ExpectedDeliverable.EXPERT_QUOTES.value: {
|
||||
"query": f"{topic} expert opinion interview insights",
|
||||
"provider": "exa",
|
||||
"priority": 4,
|
||||
"expected": "Expert opinions and authoritative quotes",
|
||||
},
|
||||
ExpectedDeliverable.CASE_STUDIES.value: {
|
||||
"query": f"{topic} case study success story implementation example",
|
||||
"provider": "exa",
|
||||
"priority": 4,
|
||||
"expected": "Real-world case studies and examples",
|
||||
},
|
||||
ExpectedDeliverable.TRENDS.value: {
|
||||
"query": f"{topic} trends 2025 future predictions emerging",
|
||||
"provider": "tavily",
|
||||
"priority": 4,
|
||||
"expected": "Current trends and future predictions",
|
||||
},
|
||||
ExpectedDeliverable.COMPARISONS.value: {
|
||||
"query": f"{topic} comparison vs versus alternatives",
|
||||
"provider": "exa",
|
||||
"priority": 4,
|
||||
"expected": "Comparison and alternative options",
|
||||
},
|
||||
ExpectedDeliverable.BEST_PRACTICES.value: {
|
||||
"query": f"{topic} best practices recommendations guidelines",
|
||||
"provider": "exa",
|
||||
"priority": 3,
|
||||
"expected": "Best practices and recommendations",
|
||||
},
|
||||
ExpectedDeliverable.STEP_BY_STEP.value: {
|
||||
"query": f"{topic} how to guide tutorial steps",
|
||||
"provider": "exa",
|
||||
"priority": 3,
|
||||
"expected": "Step-by-step guides and tutorials",
|
||||
},
|
||||
ExpectedDeliverable.PROS_CONS.value: {
|
||||
"query": f"{topic} advantages disadvantages pros cons benefits",
|
||||
"provider": "exa",
|
||||
"priority": 3,
|
||||
"expected": "Pros, cons, and trade-offs",
|
||||
},
|
||||
ExpectedDeliverable.DEFINITIONS.value: {
|
||||
"query": f"what is {topic} definition explained",
|
||||
"provider": "exa",
|
||||
"priority": 3,
|
||||
"expected": "Clear definitions and explanations",
|
||||
},
|
||||
ExpectedDeliverable.EXAMPLES.value: {
|
||||
"query": f"{topic} examples real world applications",
|
||||
"provider": "exa",
|
||||
"priority": 3,
|
||||
"expected": "Real-world examples and applications",
|
||||
},
|
||||
ExpectedDeliverable.PREDICTIONS.value: {
|
||||
"query": f"{topic} future outlook predictions 2025 2030",
|
||||
"provider": "tavily",
|
||||
"priority": 4,
|
||||
"expected": "Future predictions and outlook",
|
||||
},
|
||||
ExpectedDeliverable.CITATIONS.value: {
|
||||
"query": f"{topic} research paper study academic",
|
||||
"provider": "exa",
|
||||
"priority": 4,
|
||||
"expected": "Authoritative academic sources",
|
||||
},
|
||||
}
|
||||
|
||||
template = templates.get(deliverable, {
|
||||
"query": f"{topic}",
|
||||
"provider": "exa",
|
||||
"priority": 3,
|
||||
"expected": "General information",
|
||||
})
|
||||
|
||||
return ResearchQuery(
|
||||
query=template["query"],
|
||||
purpose=ExpectedDeliverable(deliverable) if deliverable in [e.value for e in ExpectedDeliverable] else ExpectedDeliverable.KEY_STATISTICS,
|
||||
provider=template["provider"],
|
||||
priority=template["priority"],
|
||||
expected_results=template["expected"],
|
||||
)
|
||||
|
||||
def _create_fallback_queries(self, intent: ResearchIntent) -> Dict[str, Any]:
|
||||
"""Create fallback queries when AI generation fails."""
|
||||
|
||||
topic = intent.original_input
|
||||
|
||||
# Generate basic queries for each expected deliverable
|
||||
queries = []
|
||||
for deliverable in intent.expected_deliverables[:5]: # Limit to 5
|
||||
query = self._generate_query_for_deliverable(deliverable, intent)
|
||||
queries.append(query)
|
||||
|
||||
# Add a general query if we have none
|
||||
if not queries:
|
||||
queries.append(ResearchQuery(
|
||||
query=topic,
|
||||
purpose=ExpectedDeliverable.KEY_STATISTICS,
|
||||
provider="exa",
|
||||
priority=5,
|
||||
expected_results="General information and insights",
|
||||
))
|
||||
|
||||
return {
|
||||
"queries": queries,
|
||||
"enhanced_keywords": topic.split()[:10],
|
||||
"research_angles": [
|
||||
f"Overview of {topic}",
|
||||
f"Latest trends in {topic}",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class QueryOptimizer:
|
||||
"""
|
||||
Optimizes queries for different research providers.
|
||||
|
||||
Different providers have different strengths:
|
||||
- Exa: Semantic search, good for deep research
|
||||
- Tavily: Real-time search, good for news/trends
|
||||
- Google: Factual search, good for basic info
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def optimize_for_exa(query: str, intent: ResearchIntent) -> Dict[str, Any]:
|
||||
"""Optimize query and parameters for Exa."""
|
||||
|
||||
# Determine best Exa settings based on deliverable
|
||||
deliverables = intent.expected_deliverables
|
||||
|
||||
# Determine category
|
||||
category = None
|
||||
if ExpectedDeliverable.CITATIONS.value in deliverables:
|
||||
category = "research paper"
|
||||
elif ExpectedDeliverable.TRENDS.value in deliverables:
|
||||
category = "news"
|
||||
elif intent.purpose == ResearchPurpose.COMPARE.value:
|
||||
category = "company"
|
||||
|
||||
# Determine search type
|
||||
search_type = "neural" # Default to neural for semantic understanding
|
||||
if ExpectedDeliverable.TRENDS.value in deliverables:
|
||||
search_type = "auto" # Auto is better for time-sensitive queries
|
||||
|
||||
# Number of results
|
||||
num_results = 10
|
||||
if intent.depth == "expert":
|
||||
num_results = 20
|
||||
elif intent.depth == "overview":
|
||||
num_results = 5
|
||||
|
||||
return {
|
||||
"query": query,
|
||||
"type": search_type,
|
||||
"category": category,
|
||||
"num_results": num_results,
|
||||
"text": True,
|
||||
"highlights": True,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def optimize_for_tavily(query: str, intent: ResearchIntent) -> Dict[str, Any]:
|
||||
"""Optimize query and parameters for Tavily."""
|
||||
|
||||
deliverables = intent.expected_deliverables
|
||||
|
||||
# Determine topic
|
||||
topic = "general"
|
||||
if ExpectedDeliverable.TRENDS.value in deliverables:
|
||||
topic = "news"
|
||||
|
||||
# Determine search depth
|
||||
search_depth = "basic"
|
||||
if intent.depth in ["detailed", "expert"]:
|
||||
search_depth = "advanced"
|
||||
|
||||
# Include answer for factual queries
|
||||
include_answer = False
|
||||
if ExpectedDeliverable.DEFINITIONS.value in deliverables:
|
||||
include_answer = "advanced"
|
||||
elif ExpectedDeliverable.KEY_STATISTICS.value in deliverables:
|
||||
include_answer = "basic"
|
||||
|
||||
# Time range for trends
|
||||
time_range = None
|
||||
if intent.time_sensitivity == "real_time":
|
||||
time_range = "day"
|
||||
elif intent.time_sensitivity == "recent":
|
||||
time_range = "week"
|
||||
elif ExpectedDeliverable.TRENDS.value in deliverables:
|
||||
time_range = "month"
|
||||
|
||||
return {
|
||||
"query": query,
|
||||
"topic": topic,
|
||||
"search_depth": search_depth,
|
||||
"include_answer": include_answer,
|
||||
"time_range": time_range,
|
||||
"max_results": 10,
|
||||
}
|
||||
378
backend/services/research/intent/research_intent_inference.py
Normal file
378
backend/services/research/intent/research_intent_inference.py
Normal file
@@ -0,0 +1,378 @@
|
||||
"""
|
||||
Research Intent Inference Service
|
||||
|
||||
Analyzes user input to understand their research intent.
|
||||
Uses AI to infer:
|
||||
- What the user wants to accomplish
|
||||
- What questions need answering
|
||||
- What deliverables they expect
|
||||
|
||||
Author: ALwrity Team
|
||||
Version: 1.0
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
|
||||
from models.research_intent_models import (
|
||||
ResearchIntent,
|
||||
ResearchPurpose,
|
||||
ContentOutput,
|
||||
ExpectedDeliverable,
|
||||
ResearchDepthLevel,
|
||||
InputType,
|
||||
IntentInferenceRequest,
|
||||
IntentInferenceResponse,
|
||||
ResearchQuery,
|
||||
)
|
||||
from models.research_persona_models import ResearchPersona
|
||||
from .intent_prompt_builder import IntentPromptBuilder
|
||||
|
||||
|
||||
class ResearchIntentInference:
|
||||
"""
|
||||
Infers user research intent from minimal input.
|
||||
|
||||
Instead of asking a formal questionnaire, this service
|
||||
uses AI to understand what the user really wants.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the intent inference service."""
|
||||
self.prompt_builder = IntentPromptBuilder()
|
||||
logger.info("ResearchIntentInference initialized")
|
||||
|
||||
async def infer_intent(
|
||||
self,
|
||||
user_input: str,
|
||||
keywords: Optional[List[str]] = None,
|
||||
research_persona: Optional[ResearchPersona] = None,
|
||||
competitor_data: Optional[List[Dict]] = None,
|
||||
industry: Optional[str] = None,
|
||||
target_audience: Optional[str] = None,
|
||||
) -> IntentInferenceResponse:
|
||||
"""
|
||||
Analyze user input and infer their research intent.
|
||||
|
||||
Args:
|
||||
user_input: User's keywords, question, or goal
|
||||
keywords: Extracted keywords (optional)
|
||||
research_persona: User's research persona (optional)
|
||||
competitor_data: Competitor analysis data (optional)
|
||||
industry: Industry context (optional)
|
||||
target_audience: Target audience context (optional)
|
||||
|
||||
Returns:
|
||||
IntentInferenceResponse with inferred intent and suggested queries
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Inferring intent for: {user_input[:100]}...")
|
||||
|
||||
keywords = keywords or []
|
||||
|
||||
# Build the inference prompt
|
||||
prompt = self.prompt_builder.build_intent_inference_prompt(
|
||||
user_input=user_input,
|
||||
keywords=keywords,
|
||||
research_persona=research_persona,
|
||||
competitor_data=competitor_data,
|
||||
industry=industry,
|
||||
target_audience=target_audience,
|
||||
)
|
||||
|
||||
# Define the expected JSON schema
|
||||
intent_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_type": {"type": "string", "enum": ["keywords", "question", "goal", "mixed"]},
|
||||
"primary_question": {"type": "string"},
|
||||
"secondary_questions": {"type": "array", "items": {"type": "string"}},
|
||||
"purpose": {"type": "string"},
|
||||
"content_output": {"type": "string"},
|
||||
"expected_deliverables": {"type": "array", "items": {"type": "string"}},
|
||||
"depth": {"type": "string", "enum": ["overview", "detailed", "expert"]},
|
||||
"focus_areas": {"type": "array", "items": {"type": "string"}},
|
||||
"perspective": {"type": "string"},
|
||||
"time_sensitivity": {"type": "string"},
|
||||
"confidence": {"type": "number"},
|
||||
"needs_clarification": {"type": "boolean"},
|
||||
"clarifying_questions": {"type": "array", "items": {"type": "string"}},
|
||||
"analysis_summary": {"type": "string"}
|
||||
},
|
||||
"required": [
|
||||
"input_type", "primary_question", "purpose", "content_output",
|
||||
"expected_deliverables", "depth", "confidence", "analysis_summary"
|
||||
]
|
||||
}
|
||||
|
||||
# Call LLM for intent inference
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
result = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=intent_schema,
|
||||
user_id=None
|
||||
)
|
||||
|
||||
if isinstance(result, dict) and "error" in result:
|
||||
logger.error(f"Intent inference failed: {result.get('error')}")
|
||||
return self._create_fallback_response(user_input, keywords)
|
||||
|
||||
# Parse and validate the result
|
||||
intent = self._parse_intent_result(result, user_input)
|
||||
|
||||
# Generate quick options for UI
|
||||
quick_options = self._generate_quick_options(intent, result)
|
||||
|
||||
# Create response
|
||||
response = IntentInferenceResponse(
|
||||
success=True,
|
||||
intent=intent,
|
||||
analysis_summary=result.get("analysis_summary", "Research intent analyzed"),
|
||||
suggested_queries=[], # Will be populated by query generator
|
||||
suggested_keywords=self._extract_keywords_from_input(user_input, keywords),
|
||||
suggested_angles=result.get("focus_areas", []),
|
||||
quick_options=quick_options,
|
||||
)
|
||||
|
||||
logger.info(f"Intent inferred: purpose={intent.purpose}, confidence={intent.confidence}")
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error inferring intent: {e}")
|
||||
return self._create_fallback_response(user_input, keywords or [])
|
||||
|
||||
def _parse_intent_result(self, result: Dict[str, Any], user_input: str) -> ResearchIntent:
|
||||
"""Parse LLM result into ResearchIntent model."""
|
||||
|
||||
# Map string values to enums safely
|
||||
input_type = self._safe_enum(InputType, result.get("input_type", "keywords"), InputType.KEYWORDS)
|
||||
purpose = self._safe_enum(ResearchPurpose, result.get("purpose", "learn"), ResearchPurpose.LEARN)
|
||||
content_output = self._safe_enum(ContentOutput, result.get("content_output", "general"), ContentOutput.GENERAL)
|
||||
depth = self._safe_enum(ResearchDepthLevel, result.get("depth", "detailed"), ResearchDepthLevel.DETAILED)
|
||||
|
||||
# Parse expected deliverables
|
||||
raw_deliverables = result.get("expected_deliverables", [])
|
||||
expected_deliverables = []
|
||||
for d in raw_deliverables:
|
||||
try:
|
||||
expected_deliverables.append(ExpectedDeliverable(d).value)
|
||||
except ValueError:
|
||||
# Skip invalid deliverables
|
||||
pass
|
||||
|
||||
# Ensure we have at least some deliverables
|
||||
if not expected_deliverables:
|
||||
expected_deliverables = self._infer_deliverables_from_purpose(purpose)
|
||||
|
||||
return ResearchIntent(
|
||||
primary_question=result.get("primary_question", user_input),
|
||||
secondary_questions=result.get("secondary_questions", []),
|
||||
purpose=purpose.value,
|
||||
content_output=content_output.value,
|
||||
expected_deliverables=expected_deliverables,
|
||||
depth=depth.value,
|
||||
focus_areas=result.get("focus_areas", []),
|
||||
perspective=result.get("perspective"),
|
||||
time_sensitivity=result.get("time_sensitivity"),
|
||||
input_type=input_type.value,
|
||||
original_input=user_input,
|
||||
confidence=float(result.get("confidence", 0.7)),
|
||||
needs_clarification=result.get("needs_clarification", False),
|
||||
clarifying_questions=result.get("clarifying_questions", []),
|
||||
)
|
||||
|
||||
def _safe_enum(self, enum_class, value: str, default):
|
||||
"""Safely convert string to enum, returning default if invalid."""
|
||||
try:
|
||||
return enum_class(value)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
def _infer_deliverables_from_purpose(self, purpose: ResearchPurpose) -> List[str]:
|
||||
"""Infer expected deliverables based on research purpose."""
|
||||
|
||||
purpose_deliverables = {
|
||||
ResearchPurpose.LEARN: [
|
||||
ExpectedDeliverable.DEFINITIONS.value,
|
||||
ExpectedDeliverable.EXAMPLES.value,
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
],
|
||||
ResearchPurpose.CREATE_CONTENT: [
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
ExpectedDeliverable.EXPERT_QUOTES.value,
|
||||
ExpectedDeliverable.EXAMPLES.value,
|
||||
ExpectedDeliverable.CASE_STUDIES.value,
|
||||
],
|
||||
ResearchPurpose.MAKE_DECISION: [
|
||||
ExpectedDeliverable.PROS_CONS.value,
|
||||
ExpectedDeliverable.COMPARISONS.value,
|
||||
ExpectedDeliverable.BEST_PRACTICES.value,
|
||||
],
|
||||
ResearchPurpose.COMPARE: [
|
||||
ExpectedDeliverable.COMPARISONS.value,
|
||||
ExpectedDeliverable.PROS_CONS.value,
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
],
|
||||
ResearchPurpose.SOLVE_PROBLEM: [
|
||||
ExpectedDeliverable.STEP_BY_STEP.value,
|
||||
ExpectedDeliverable.BEST_PRACTICES.value,
|
||||
ExpectedDeliverable.CASE_STUDIES.value,
|
||||
],
|
||||
ResearchPurpose.FIND_DATA: [
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
ExpectedDeliverable.CITATIONS.value,
|
||||
],
|
||||
ResearchPurpose.EXPLORE_TRENDS: [
|
||||
ExpectedDeliverable.TRENDS.value,
|
||||
ExpectedDeliverable.PREDICTIONS.value,
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
],
|
||||
ResearchPurpose.VALIDATE: [
|
||||
ExpectedDeliverable.CITATIONS.value,
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
ExpectedDeliverable.EXPERT_QUOTES.value,
|
||||
],
|
||||
ResearchPurpose.GENERATE_IDEAS: [
|
||||
ExpectedDeliverable.EXAMPLES.value,
|
||||
ExpectedDeliverable.TRENDS.value,
|
||||
ExpectedDeliverable.CASE_STUDIES.value,
|
||||
],
|
||||
}
|
||||
|
||||
return purpose_deliverables.get(purpose, [ExpectedDeliverable.KEY_STATISTICS.value])
|
||||
|
||||
def _generate_quick_options(self, intent: ResearchIntent, result: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Generate quick options for UI confirmation."""
|
||||
|
||||
options = []
|
||||
|
||||
# Purpose option
|
||||
options.append({
|
||||
"id": "purpose",
|
||||
"label": "Research Purpose",
|
||||
"value": intent.purpose,
|
||||
"display": self._purpose_display(intent.purpose),
|
||||
"alternatives": [p.value for p in ResearchPurpose],
|
||||
"confidence": result.get("confidence", 0.7),
|
||||
})
|
||||
|
||||
# Content output option
|
||||
if intent.content_output != ContentOutput.GENERAL.value:
|
||||
options.append({
|
||||
"id": "content_output",
|
||||
"label": "Content Type",
|
||||
"value": intent.content_output,
|
||||
"display": intent.content_output.replace("_", " ").title(),
|
||||
"alternatives": [c.value for c in ContentOutput],
|
||||
"confidence": result.get("confidence", 0.7),
|
||||
})
|
||||
|
||||
# Deliverables option
|
||||
options.append({
|
||||
"id": "deliverables",
|
||||
"label": "What I'll Find",
|
||||
"value": intent.expected_deliverables,
|
||||
"display": [d.replace("_", " ").title() for d in intent.expected_deliverables[:4]],
|
||||
"alternatives": [d.value for d in ExpectedDeliverable],
|
||||
"confidence": result.get("confidence", 0.7),
|
||||
"multi_select": True,
|
||||
})
|
||||
|
||||
# Depth option
|
||||
options.append({
|
||||
"id": "depth",
|
||||
"label": "Research Depth",
|
||||
"value": intent.depth,
|
||||
"display": intent.depth.title(),
|
||||
"alternatives": [d.value for d in ResearchDepthLevel],
|
||||
"confidence": result.get("confidence", 0.7),
|
||||
})
|
||||
|
||||
return options
|
||||
|
||||
def _purpose_display(self, purpose: str) -> str:
|
||||
"""Get display-friendly purpose text."""
|
||||
display_map = {
|
||||
"learn": "Understand this topic",
|
||||
"create_content": "Create content about this",
|
||||
"make_decision": "Make a decision",
|
||||
"compare": "Compare options",
|
||||
"solve_problem": "Solve a problem",
|
||||
"find_data": "Find specific data",
|
||||
"explore_trends": "Explore trends",
|
||||
"validate": "Validate information",
|
||||
"generate_ideas": "Generate ideas",
|
||||
}
|
||||
return display_map.get(purpose, purpose.replace("_", " ").title())
|
||||
|
||||
def _extract_keywords_from_input(self, user_input: str, keywords: List[str]) -> List[str]:
|
||||
"""Extract and enhance keywords from user input."""
|
||||
|
||||
# Start with provided keywords
|
||||
extracted = list(keywords) if keywords else []
|
||||
|
||||
# Simple extraction from input (split on common delimiters)
|
||||
words = user_input.lower().replace(",", " ").replace(";", " ").split()
|
||||
|
||||
# Filter out common words
|
||||
stop_words = {
|
||||
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
|
||||
"have", "has", "had", "do", "does", "did", "will", "would", "could",
|
||||
"should", "may", "might", "must", "shall", "can", "need", "dare",
|
||||
"to", "of", "in", "for", "on", "with", "at", "by", "from", "up",
|
||||
"about", "into", "through", "during", "before", "after", "above",
|
||||
"below", "between", "under", "again", "further", "then", "once",
|
||||
"here", "there", "when", "where", "why", "how", "all", "each",
|
||||
"few", "more", "most", "other", "some", "such", "no", "nor", "not",
|
||||
"only", "own", "same", "so", "than", "too", "very", "just", "and",
|
||||
"but", "if", "or", "because", "as", "until", "while", "i", "we",
|
||||
"you", "they", "what", "which", "who", "whom", "this", "that",
|
||||
"these", "those", "am", "want", "write", "blog", "post", "article",
|
||||
}
|
||||
|
||||
for word in words:
|
||||
if word not in stop_words and len(word) > 2 and word not in extracted:
|
||||
extracted.append(word)
|
||||
|
||||
return extracted[:15] # Limit to 15 keywords
|
||||
|
||||
def _create_fallback_response(self, user_input: str, keywords: List[str]) -> IntentInferenceResponse:
|
||||
"""Create a fallback response when AI inference fails."""
|
||||
|
||||
# Create a basic intent from the input
|
||||
fallback_intent = ResearchIntent(
|
||||
primary_question=f"What are the key insights about: {user_input}?",
|
||||
secondary_questions=[
|
||||
f"What are the latest trends in {user_input}?",
|
||||
f"What are best practices for {user_input}?",
|
||||
],
|
||||
purpose=ResearchPurpose.LEARN.value,
|
||||
content_output=ContentOutput.GENERAL.value,
|
||||
expected_deliverables=[
|
||||
ExpectedDeliverable.KEY_STATISTICS.value,
|
||||
ExpectedDeliverable.EXAMPLES.value,
|
||||
ExpectedDeliverable.BEST_PRACTICES.value,
|
||||
],
|
||||
depth=ResearchDepthLevel.DETAILED.value,
|
||||
focus_areas=[],
|
||||
input_type=InputType.KEYWORDS.value,
|
||||
original_input=user_input,
|
||||
confidence=0.5,
|
||||
needs_clarification=True,
|
||||
clarifying_questions=[
|
||||
"What type of content are you creating?",
|
||||
"What specific aspects are you most interested in?",
|
||||
],
|
||||
)
|
||||
|
||||
return IntentInferenceResponse(
|
||||
success=True, # Still return success, just with lower confidence
|
||||
intent=fallback_intent,
|
||||
analysis_summary=f"Basic research analysis for: {user_input}",
|
||||
suggested_queries=[],
|
||||
suggested_keywords=keywords,
|
||||
suggested_angles=[],
|
||||
quick_options=[],
|
||||
)
|
||||
Reference in New Issue
Block a user