388 lines
14 KiB
Python
388 lines
14 KiB
Python
"""
|
|
Intent Query Generator
|
|
|
|
Generates multiple targeted research queries based on user intent.
|
|
Each query targets a specific deliverable or question.
|
|
|
|
Author: ALwrity Team
|
|
Version: 1.0
|
|
"""
|
|
|
|
import json
|
|
from typing import Dict, Any, List, Optional
|
|
from loguru import logger
|
|
|
|
from models.research_intent_models import (
|
|
ResearchIntent,
|
|
ResearchQuery,
|
|
ExpectedDeliverable,
|
|
ResearchPurpose,
|
|
)
|
|
from models.research_persona_models import ResearchPersona
|
|
from .intent_prompt_builder import IntentPromptBuilder
|
|
|
|
|
|
class IntentQueryGenerator:
|
|
"""
|
|
Generates targeted research queries based on user intent.
|
|
|
|
Instead of a single generic search, generates multiple queries
|
|
each targeting a specific deliverable or question.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the query generator."""
|
|
self.prompt_builder = IntentPromptBuilder()
|
|
logger.info("IntentQueryGenerator initialized")
|
|
|
|
async def generate_queries(
|
|
self,
|
|
intent: ResearchIntent,
|
|
research_persona: Optional[ResearchPersona] = None,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Generate targeted research queries based on intent.
|
|
|
|
Args:
|
|
intent: The inferred research intent
|
|
research_persona: Optional persona for context
|
|
|
|
Returns:
|
|
Dict with queries, enhanced_keywords, and research_angles
|
|
"""
|
|
try:
|
|
logger.info(f"Generating queries for: {intent.primary_question[:50]}...")
|
|
|
|
# Build the query generation prompt
|
|
prompt = self.prompt_builder.build_query_generation_prompt(
|
|
intent=intent,
|
|
research_persona=research_persona,
|
|
)
|
|
|
|
# Define the expected JSON schema
|
|
query_schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"queries": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {"type": "string"},
|
|
"purpose": {"type": "string"},
|
|
"provider": {"type": "string"},
|
|
"priority": {"type": "integer"},
|
|
"expected_results": {"type": "string"}
|
|
},
|
|
"required": ["query", "purpose", "provider", "priority", "expected_results"]
|
|
}
|
|
},
|
|
"enhanced_keywords": {"type": "array", "items": {"type": "string"}},
|
|
"research_angles": {"type": "array", "items": {"type": "string"}}
|
|
},
|
|
"required": ["queries", "enhanced_keywords", "research_angles"]
|
|
}
|
|
|
|
# Call LLM for query generation
|
|
from services.llm_providers.main_text_generation import llm_text_gen
|
|
|
|
result = llm_text_gen(
|
|
prompt=prompt,
|
|
json_struct=query_schema,
|
|
user_id=None
|
|
)
|
|
|
|
if isinstance(result, dict) and "error" in result:
|
|
logger.error(f"Query generation failed: {result.get('error')}")
|
|
return self._create_fallback_queries(intent)
|
|
|
|
# Parse queries
|
|
queries = self._parse_queries(result.get("queries", []))
|
|
|
|
# Ensure we have queries for all expected deliverables
|
|
queries = self._ensure_deliverable_coverage(queries, intent)
|
|
|
|
# Sort by priority
|
|
queries.sort(key=lambda q: q.priority, reverse=True)
|
|
|
|
logger.info(f"Generated {len(queries)} targeted queries")
|
|
|
|
return {
|
|
"queries": queries,
|
|
"enhanced_keywords": result.get("enhanced_keywords", []),
|
|
"research_angles": result.get("research_angles", []),
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating queries: {e}")
|
|
return self._create_fallback_queries(intent)
|
|
|
|
def _parse_queries(self, raw_queries: List[Dict]) -> List[ResearchQuery]:
|
|
"""Parse raw query data into ResearchQuery objects."""
|
|
|
|
queries = []
|
|
for q in raw_queries:
|
|
try:
|
|
# Validate purpose
|
|
purpose_str = q.get("purpose", "key_statistics")
|
|
try:
|
|
purpose = ExpectedDeliverable(purpose_str)
|
|
except ValueError:
|
|
purpose = ExpectedDeliverable.KEY_STATISTICS
|
|
|
|
query = ResearchQuery(
|
|
query=q.get("query", ""),
|
|
purpose=purpose,
|
|
provider=q.get("provider", "exa"),
|
|
priority=min(max(int(q.get("priority", 3)), 1), 5), # Clamp 1-5
|
|
expected_results=q.get("expected_results", ""),
|
|
)
|
|
queries.append(query)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to parse query: {e}")
|
|
continue
|
|
|
|
return queries
|
|
|
|
def _ensure_deliverable_coverage(
|
|
self,
|
|
queries: List[ResearchQuery],
|
|
intent: ResearchIntent,
|
|
) -> List[ResearchQuery]:
|
|
"""Ensure we have queries for all expected deliverables."""
|
|
|
|
# Get deliverables already covered
|
|
covered = set(q.purpose.value for q in queries)
|
|
|
|
# Check for missing deliverables
|
|
for deliverable in intent.expected_deliverables:
|
|
if deliverable not in covered:
|
|
# Generate a query for this deliverable
|
|
query = self._generate_query_for_deliverable(
|
|
deliverable=deliverable,
|
|
intent=intent,
|
|
)
|
|
queries.append(query)
|
|
|
|
return queries
|
|
|
|
def _generate_query_for_deliverable(
|
|
self,
|
|
deliverable: str,
|
|
intent: ResearchIntent,
|
|
) -> ResearchQuery:
|
|
"""Generate a query targeting a specific deliverable."""
|
|
|
|
# Extract topic from primary question
|
|
topic = intent.original_input
|
|
|
|
# Query templates by deliverable type
|
|
templates = {
|
|
ExpectedDeliverable.KEY_STATISTICS.value: {
|
|
"query": f"{topic} statistics data report study",
|
|
"provider": "exa",
|
|
"priority": 5,
|
|
"expected": "Statistical data and research findings",
|
|
},
|
|
ExpectedDeliverable.EXPERT_QUOTES.value: {
|
|
"query": f"{topic} expert opinion interview insights",
|
|
"provider": "exa",
|
|
"priority": 4,
|
|
"expected": "Expert opinions and authoritative quotes",
|
|
},
|
|
ExpectedDeliverable.CASE_STUDIES.value: {
|
|
"query": f"{topic} case study success story implementation example",
|
|
"provider": "exa",
|
|
"priority": 4,
|
|
"expected": "Real-world case studies and examples",
|
|
},
|
|
ExpectedDeliverable.TRENDS.value: {
|
|
"query": f"{topic} trends 2025 future predictions emerging",
|
|
"provider": "tavily",
|
|
"priority": 4,
|
|
"expected": "Current trends and future predictions",
|
|
},
|
|
ExpectedDeliverable.COMPARISONS.value: {
|
|
"query": f"{topic} comparison vs versus alternatives",
|
|
"provider": "exa",
|
|
"priority": 4,
|
|
"expected": "Comparison and alternative options",
|
|
},
|
|
ExpectedDeliverable.BEST_PRACTICES.value: {
|
|
"query": f"{topic} best practices recommendations guidelines",
|
|
"provider": "exa",
|
|
"priority": 3,
|
|
"expected": "Best practices and recommendations",
|
|
},
|
|
ExpectedDeliverable.STEP_BY_STEP.value: {
|
|
"query": f"{topic} how to guide tutorial steps",
|
|
"provider": "exa",
|
|
"priority": 3,
|
|
"expected": "Step-by-step guides and tutorials",
|
|
},
|
|
ExpectedDeliverable.PROS_CONS.value: {
|
|
"query": f"{topic} advantages disadvantages pros cons benefits",
|
|
"provider": "exa",
|
|
"priority": 3,
|
|
"expected": "Pros, cons, and trade-offs",
|
|
},
|
|
ExpectedDeliverable.DEFINITIONS.value: {
|
|
"query": f"what is {topic} definition explained",
|
|
"provider": "exa",
|
|
"priority": 3,
|
|
"expected": "Clear definitions and explanations",
|
|
},
|
|
ExpectedDeliverable.EXAMPLES.value: {
|
|
"query": f"{topic} examples real world applications",
|
|
"provider": "exa",
|
|
"priority": 3,
|
|
"expected": "Real-world examples and applications",
|
|
},
|
|
ExpectedDeliverable.PREDICTIONS.value: {
|
|
"query": f"{topic} future outlook predictions 2025 2030",
|
|
"provider": "tavily",
|
|
"priority": 4,
|
|
"expected": "Future predictions and outlook",
|
|
},
|
|
ExpectedDeliverable.CITATIONS.value: {
|
|
"query": f"{topic} research paper study academic",
|
|
"provider": "exa",
|
|
"priority": 4,
|
|
"expected": "Authoritative academic sources",
|
|
},
|
|
}
|
|
|
|
template = templates.get(deliverable, {
|
|
"query": f"{topic}",
|
|
"provider": "exa",
|
|
"priority": 3,
|
|
"expected": "General information",
|
|
})
|
|
|
|
return ResearchQuery(
|
|
query=template["query"],
|
|
purpose=ExpectedDeliverable(deliverable) if deliverable in [e.value for e in ExpectedDeliverable] else ExpectedDeliverable.KEY_STATISTICS,
|
|
provider=template["provider"],
|
|
priority=template["priority"],
|
|
expected_results=template["expected"],
|
|
)
|
|
|
|
def _create_fallback_queries(self, intent: ResearchIntent) -> Dict[str, Any]:
|
|
"""Create fallback queries when AI generation fails."""
|
|
|
|
topic = intent.original_input
|
|
|
|
# Generate basic queries for each expected deliverable
|
|
queries = []
|
|
for deliverable in intent.expected_deliverables[:5]: # Limit to 5
|
|
query = self._generate_query_for_deliverable(deliverable, intent)
|
|
queries.append(query)
|
|
|
|
# Add a general query if we have none
|
|
if not queries:
|
|
queries.append(ResearchQuery(
|
|
query=topic,
|
|
purpose=ExpectedDeliverable.KEY_STATISTICS,
|
|
provider="exa",
|
|
priority=5,
|
|
expected_results="General information and insights",
|
|
))
|
|
|
|
return {
|
|
"queries": queries,
|
|
"enhanced_keywords": topic.split()[:10],
|
|
"research_angles": [
|
|
f"Overview of {topic}",
|
|
f"Latest trends in {topic}",
|
|
],
|
|
}
|
|
|
|
|
|
class QueryOptimizer:
|
|
"""
|
|
Optimizes queries for different research providers.
|
|
|
|
Different providers have different strengths:
|
|
- Exa: Semantic search, good for deep research
|
|
- Tavily: Real-time search, good for news/trends
|
|
- Google: Factual search, good for basic info
|
|
"""
|
|
|
|
@staticmethod
|
|
def optimize_for_exa(query: str, intent: ResearchIntent) -> Dict[str, Any]:
|
|
"""Optimize query and parameters for Exa."""
|
|
|
|
# Determine best Exa settings based on deliverable
|
|
deliverables = intent.expected_deliverables
|
|
|
|
# Determine category
|
|
category = None
|
|
if ExpectedDeliverable.CITATIONS.value in deliverables:
|
|
category = "research paper"
|
|
elif ExpectedDeliverable.TRENDS.value in deliverables:
|
|
category = "news"
|
|
elif intent.purpose == ResearchPurpose.COMPARE.value:
|
|
category = "company"
|
|
|
|
# Determine search type
|
|
search_type = "neural" # Default to neural for semantic understanding
|
|
if ExpectedDeliverable.TRENDS.value in deliverables:
|
|
search_type = "auto" # Auto is better for time-sensitive queries
|
|
|
|
# Number of results
|
|
num_results = 10
|
|
if intent.depth == "expert":
|
|
num_results = 20
|
|
elif intent.depth == "overview":
|
|
num_results = 5
|
|
|
|
return {
|
|
"query": query,
|
|
"type": search_type,
|
|
"category": category,
|
|
"num_results": num_results,
|
|
"text": True,
|
|
"highlights": True,
|
|
}
|
|
|
|
@staticmethod
|
|
def optimize_for_tavily(query: str, intent: ResearchIntent) -> Dict[str, Any]:
|
|
"""Optimize query and parameters for Tavily."""
|
|
|
|
deliverables = intent.expected_deliverables
|
|
|
|
# Determine topic
|
|
topic = "general"
|
|
if ExpectedDeliverable.TRENDS.value in deliverables:
|
|
topic = "news"
|
|
|
|
# Determine search depth
|
|
search_depth = "basic"
|
|
if intent.depth in ["detailed", "expert"]:
|
|
search_depth = "advanced"
|
|
|
|
# Include answer for factual queries
|
|
include_answer = False
|
|
if ExpectedDeliverable.DEFINITIONS.value in deliverables:
|
|
include_answer = "advanced"
|
|
elif ExpectedDeliverable.KEY_STATISTICS.value in deliverables:
|
|
include_answer = "basic"
|
|
|
|
# Time range for trends
|
|
time_range = None
|
|
if intent.time_sensitivity == "real_time":
|
|
time_range = "day"
|
|
elif intent.time_sensitivity == "recent":
|
|
time_range = "week"
|
|
elif ExpectedDeliverable.TRENDS.value in deliverables:
|
|
time_range = "month"
|
|
|
|
return {
|
|
"query": query,
|
|
"topic": topic,
|
|
"search_depth": search_depth,
|
|
"include_answer": include_answer,
|
|
"time_range": time_range,
|
|
"max_results": 10,
|
|
}
|