Files
ALwrity/backend/services/research/intent/intent_query_generator.py
ajaysi b134e9dc7e Added video studio router and endpoints. Added research router and endpoints. Added youtube router and endpoints. Added onboarding utils router and endpoints. Added onboarding utils service. Added onboarding utils models. Added onboarding utils routes. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils. Added onboarding utils utils.
2026-01-01 17:56:25 +05:30

388 lines
14 KiB
Python

"""
Intent Query Generator
Generates multiple targeted research queries based on user intent.
Each query targets a specific deliverable or question.
Author: ALwrity Team
Version: 1.0
"""
import json
from typing import Dict, Any, List, Optional
from loguru import logger
from models.research_intent_models import (
ResearchIntent,
ResearchQuery,
ExpectedDeliverable,
ResearchPurpose,
)
from models.research_persona_models import ResearchPersona
from .intent_prompt_builder import IntentPromptBuilder
class IntentQueryGenerator:
"""
Generates targeted research queries based on user intent.
Instead of a single generic search, generates multiple queries
each targeting a specific deliverable or question.
"""
def __init__(self):
"""Initialize the query generator."""
self.prompt_builder = IntentPromptBuilder()
logger.info("IntentQueryGenerator initialized")
async def generate_queries(
self,
intent: ResearchIntent,
research_persona: Optional[ResearchPersona] = None,
) -> Dict[str, Any]:
"""
Generate targeted research queries based on intent.
Args:
intent: The inferred research intent
research_persona: Optional persona for context
Returns:
Dict with queries, enhanced_keywords, and research_angles
"""
try:
logger.info(f"Generating queries for: {intent.primary_question[:50]}...")
# Build the query generation prompt
prompt = self.prompt_builder.build_query_generation_prompt(
intent=intent,
research_persona=research_persona,
)
# Define the expected JSON schema
query_schema = {
"type": "object",
"properties": {
"queries": {
"type": "array",
"items": {
"type": "object",
"properties": {
"query": {"type": "string"},
"purpose": {"type": "string"},
"provider": {"type": "string"},
"priority": {"type": "integer"},
"expected_results": {"type": "string"}
},
"required": ["query", "purpose", "provider", "priority", "expected_results"]
}
},
"enhanced_keywords": {"type": "array", "items": {"type": "string"}},
"research_angles": {"type": "array", "items": {"type": "string"}}
},
"required": ["queries", "enhanced_keywords", "research_angles"]
}
# Call LLM for query generation
from services.llm_providers.main_text_generation import llm_text_gen
result = llm_text_gen(
prompt=prompt,
json_struct=query_schema,
user_id=None
)
if isinstance(result, dict) and "error" in result:
logger.error(f"Query generation failed: {result.get('error')}")
return self._create_fallback_queries(intent)
# Parse queries
queries = self._parse_queries(result.get("queries", []))
# Ensure we have queries for all expected deliverables
queries = self._ensure_deliverable_coverage(queries, intent)
# Sort by priority
queries.sort(key=lambda q: q.priority, reverse=True)
logger.info(f"Generated {len(queries)} targeted queries")
return {
"queries": queries,
"enhanced_keywords": result.get("enhanced_keywords", []),
"research_angles": result.get("research_angles", []),
}
except Exception as e:
logger.error(f"Error generating queries: {e}")
return self._create_fallback_queries(intent)
def _parse_queries(self, raw_queries: List[Dict]) -> List[ResearchQuery]:
"""Parse raw query data into ResearchQuery objects."""
queries = []
for q in raw_queries:
try:
# Validate purpose
purpose_str = q.get("purpose", "key_statistics")
try:
purpose = ExpectedDeliverable(purpose_str)
except ValueError:
purpose = ExpectedDeliverable.KEY_STATISTICS
query = ResearchQuery(
query=q.get("query", ""),
purpose=purpose,
provider=q.get("provider", "exa"),
priority=min(max(int(q.get("priority", 3)), 1), 5), # Clamp 1-5
expected_results=q.get("expected_results", ""),
)
queries.append(query)
except Exception as e:
logger.warning(f"Failed to parse query: {e}")
continue
return queries
def _ensure_deliverable_coverage(
self,
queries: List[ResearchQuery],
intent: ResearchIntent,
) -> List[ResearchQuery]:
"""Ensure we have queries for all expected deliverables."""
# Get deliverables already covered
covered = set(q.purpose.value for q in queries)
# Check for missing deliverables
for deliverable in intent.expected_deliverables:
if deliverable not in covered:
# Generate a query for this deliverable
query = self._generate_query_for_deliverable(
deliverable=deliverable,
intent=intent,
)
queries.append(query)
return queries
def _generate_query_for_deliverable(
self,
deliverable: str,
intent: ResearchIntent,
) -> ResearchQuery:
"""Generate a query targeting a specific deliverable."""
# Extract topic from primary question
topic = intent.original_input
# Query templates by deliverable type
templates = {
ExpectedDeliverable.KEY_STATISTICS.value: {
"query": f"{topic} statistics data report study",
"provider": "exa",
"priority": 5,
"expected": "Statistical data and research findings",
},
ExpectedDeliverable.EXPERT_QUOTES.value: {
"query": f"{topic} expert opinion interview insights",
"provider": "exa",
"priority": 4,
"expected": "Expert opinions and authoritative quotes",
},
ExpectedDeliverable.CASE_STUDIES.value: {
"query": f"{topic} case study success story implementation example",
"provider": "exa",
"priority": 4,
"expected": "Real-world case studies and examples",
},
ExpectedDeliverable.TRENDS.value: {
"query": f"{topic} trends 2025 future predictions emerging",
"provider": "tavily",
"priority": 4,
"expected": "Current trends and future predictions",
},
ExpectedDeliverable.COMPARISONS.value: {
"query": f"{topic} comparison vs versus alternatives",
"provider": "exa",
"priority": 4,
"expected": "Comparison and alternative options",
},
ExpectedDeliverable.BEST_PRACTICES.value: {
"query": f"{topic} best practices recommendations guidelines",
"provider": "exa",
"priority": 3,
"expected": "Best practices and recommendations",
},
ExpectedDeliverable.STEP_BY_STEP.value: {
"query": f"{topic} how to guide tutorial steps",
"provider": "exa",
"priority": 3,
"expected": "Step-by-step guides and tutorials",
},
ExpectedDeliverable.PROS_CONS.value: {
"query": f"{topic} advantages disadvantages pros cons benefits",
"provider": "exa",
"priority": 3,
"expected": "Pros, cons, and trade-offs",
},
ExpectedDeliverable.DEFINITIONS.value: {
"query": f"what is {topic} definition explained",
"provider": "exa",
"priority": 3,
"expected": "Clear definitions and explanations",
},
ExpectedDeliverable.EXAMPLES.value: {
"query": f"{topic} examples real world applications",
"provider": "exa",
"priority": 3,
"expected": "Real-world examples and applications",
},
ExpectedDeliverable.PREDICTIONS.value: {
"query": f"{topic} future outlook predictions 2025 2030",
"provider": "tavily",
"priority": 4,
"expected": "Future predictions and outlook",
},
ExpectedDeliverable.CITATIONS.value: {
"query": f"{topic} research paper study academic",
"provider": "exa",
"priority": 4,
"expected": "Authoritative academic sources",
},
}
template = templates.get(deliverable, {
"query": f"{topic}",
"provider": "exa",
"priority": 3,
"expected": "General information",
})
return ResearchQuery(
query=template["query"],
purpose=ExpectedDeliverable(deliverable) if deliverable in [e.value for e in ExpectedDeliverable] else ExpectedDeliverable.KEY_STATISTICS,
provider=template["provider"],
priority=template["priority"],
expected_results=template["expected"],
)
def _create_fallback_queries(self, intent: ResearchIntent) -> Dict[str, Any]:
"""Create fallback queries when AI generation fails."""
topic = intent.original_input
# Generate basic queries for each expected deliverable
queries = []
for deliverable in intent.expected_deliverables[:5]: # Limit to 5
query = self._generate_query_for_deliverable(deliverable, intent)
queries.append(query)
# Add a general query if we have none
if not queries:
queries.append(ResearchQuery(
query=topic,
purpose=ExpectedDeliverable.KEY_STATISTICS,
provider="exa",
priority=5,
expected_results="General information and insights",
))
return {
"queries": queries,
"enhanced_keywords": topic.split()[:10],
"research_angles": [
f"Overview of {topic}",
f"Latest trends in {topic}",
],
}
class QueryOptimizer:
"""
Optimizes queries for different research providers.
Different providers have different strengths:
- Exa: Semantic search, good for deep research
- Tavily: Real-time search, good for news/trends
- Google: Factual search, good for basic info
"""
@staticmethod
def optimize_for_exa(query: str, intent: ResearchIntent) -> Dict[str, Any]:
"""Optimize query and parameters for Exa."""
# Determine best Exa settings based on deliverable
deliverables = intent.expected_deliverables
# Determine category
category = None
if ExpectedDeliverable.CITATIONS.value in deliverables:
category = "research paper"
elif ExpectedDeliverable.TRENDS.value in deliverables:
category = "news"
elif intent.purpose == ResearchPurpose.COMPARE.value:
category = "company"
# Determine search type
search_type = "neural" # Default to neural for semantic understanding
if ExpectedDeliverable.TRENDS.value in deliverables:
search_type = "auto" # Auto is better for time-sensitive queries
# Number of results
num_results = 10
if intent.depth == "expert":
num_results = 20
elif intent.depth == "overview":
num_results = 5
return {
"query": query,
"type": search_type,
"category": category,
"num_results": num_results,
"text": True,
"highlights": True,
}
@staticmethod
def optimize_for_tavily(query: str, intent: ResearchIntent) -> Dict[str, Any]:
"""Optimize query and parameters for Tavily."""
deliverables = intent.expected_deliverables
# Determine topic
topic = "general"
if ExpectedDeliverable.TRENDS.value in deliverables:
topic = "news"
# Determine search depth
search_depth = "basic"
if intent.depth in ["detailed", "expert"]:
search_depth = "advanced"
# Include answer for factual queries
include_answer = False
if ExpectedDeliverable.DEFINITIONS.value in deliverables:
include_answer = "advanced"
elif ExpectedDeliverable.KEY_STATISTICS.value in deliverables:
include_answer = "basic"
# Time range for trends
time_range = None
if intent.time_sensitivity == "real_time":
time_range = "day"
elif intent.time_sensitivity == "recent":
time_range = "week"
elif ExpectedDeliverable.TRENDS.value in deliverables:
time_range = "month"
return {
"query": query,
"topic": topic,
"search_depth": search_depth,
"include_answer": include_answer,
"time_range": time_range,
"max_results": 10,
}