AI Analysis and Content Strategy fixes. Enhanced Strategy Routes refactoring.

This commit is contained in:
ajaysi
2026-01-10 19:32:50 +05:30
parent 0b63ae7fc1
commit 8193cdba67
298 changed files with 45678 additions and 10952 deletions

View File

@@ -0,0 +1,209 @@
"""
Result parsing logic for unified research analyzer.
Parses LLM response into structured ResearchIntent, ResearchQuery,
and configuration dictionaries.
"""
from typing import Dict, Any, List
from loguru import logger
from models.research_intent_models import (
ResearchIntent, ResearchQuery,
ResearchPurpose, ContentOutput, ExpectedDeliverable,
ResearchDepthLevel, InputType
)
from .query_deduplicator import deduplicate_queries
def _normalize_purpose(value: str) -> str:
"""Normalize purpose value to enum."""
if not value or not isinstance(value, str):
return "learn"
value_lower = value.lower()
# Check for exact match
for purpose in ResearchPurpose:
if value_lower == purpose.value or value_lower == purpose.name.lower():
return purpose.value
# Check for keywords in description
if "content" in value_lower or "write" in value_lower or "create" in value_lower or "blog" in value_lower:
return "create_content"
elif "compare" in value_lower or "comparison" in value_lower:
return "compare"
elif "decision" in value_lower or "choose" in value_lower:
return "make_decision"
elif "problem" in value_lower or "solve" in value_lower:
return "solve_problem"
elif "data" in value_lower or "statistic" in value_lower or "fact" in value_lower:
return "find_data"
elif "trend" in value_lower:
return "explore_trends"
elif "validat" in value_lower or "verify" in value_lower:
return "validate"
elif "idea" in value_lower or "brainstorm" in value_lower:
return "generate_ideas"
return "learn"
def _normalize_content_output(value: str) -> str:
"""Normalize content_output value to enum."""
if not value or not isinstance(value, str):
return "general"
value_lower = value.lower()
# Check for exact match
for output in ContentOutput:
if value_lower == output.value or value_lower == output.name.lower():
return output.value
# Check for keywords
if "blog" in value_lower or "article" in value_lower:
return "blog"
elif "podcast" in value_lower:
return "podcast"
elif "video" in value_lower:
return "video"
elif "social" in value_lower or "post" in value_lower:
return "social_post"
elif "newsletter" in value_lower:
return "newsletter"
elif "presentation" in value_lower or "slide" in value_lower:
return "presentation"
elif "report" in value_lower:
return "report"
elif "whitepaper" in value_lower or "white paper" in value_lower:
return "whitepaper"
elif "email" in value_lower:
return "email"
return "general"
def _normalize_deliverable(value: str) -> str:
"""Normalize deliverable value to enum."""
if not value or not isinstance(value, str):
return "key_statistics"
value_lower = value.lower().strip()
# Check for exact match first
for deliverable in ExpectedDeliverable:
if value_lower == deliverable.value or value_lower == deliverable.name.lower():
return deliverable.value
# Check for keywords (more aggressive matching)
if "statistic" in value_lower or "data" in value_lower or "number" in value_lower or "metric" in value_lower or "report" in value_lower:
return "key_statistics"
elif "quote" in value_lower or "expert" in value_lower:
return "expert_quotes"
elif "case" in value_lower or "study" in value_lower:
return "case_studies"
elif "compar" in value_lower or "compare" in value_lower or "landscape" in value_lower or "matrix" in value_lower:
return "comparisons"
elif "trend" in value_lower or "keyword" in value_lower or "seo" in value_lower:
return "trends"
elif "practice" in value_lower or "best" in value_lower or "guideline" in value_lower or "recommendation" in value_lower or "calendar" in value_lower:
return "best_practices"
elif "step" in value_lower or "how" in value_lower or "process" in value_lower or "guide" in value_lower or "outline" in value_lower or "heading" in value_lower:
return "step_by_step"
elif ("pro" in value_lower and "con" in value_lower) or "advantage" in value_lower or "disadvantage" in value_lower:
return "pros_cons"
elif "defin" in value_lower or "explain" in value_lower:
return "definitions"
elif "citation" in value_lower or "source" in value_lower or "reference" in value_lower:
return "citations"
elif "example" in value_lower or "sample" in value_lower:
return "examples"
elif "prediction" in value_lower or "future" in value_lower or "outlook" in value_lower:
return "predictions"
# Default fallback
return "key_statistics"
def parse_unified_result(result: Dict[str, Any], user_input: str) -> Dict[str, Any]:
"""
Parse the unified LLM result into structured response.
Args:
result: Raw LLM response dictionary
user_input: Original user input for fallback values
Returns:
Structured response with intent, queries, configs, etc.
"""
intent_data = result.get("intent", {})
# Normalize enum values
purpose_value = _normalize_purpose(intent_data.get("purpose", "learn"))
content_output_value = _normalize_content_output(intent_data.get("content_output", "general"))
# Normalize deliverables list
deliverables_raw = intent_data.get("expected_deliverables", ["key_statistics"])
if not isinstance(deliverables_raw, list):
deliverables_raw = [deliverables_raw] if deliverables_raw else ["key_statistics"]
normalized_deliverables = [_normalize_deliverable(d) for d in deliverables_raw if d]
if not normalized_deliverables:
normalized_deliverables = ["key_statistics"]
# Build ResearchIntent
try:
intent = ResearchIntent(
primary_question=intent_data.get("primary_question", user_input),
secondary_questions=intent_data.get("secondary_questions", []),
purpose=purpose_value,
content_output=content_output_value,
expected_deliverables=normalized_deliverables,
depth=intent_data.get("depth", "detailed"),
focus_areas=intent_data.get("focus_areas", []),
also_answering=intent_data.get("also_answering", []),
perspective=intent_data.get("perspective"),
time_sensitivity=intent_data.get("time_sensitivity"),
input_type=intent_data.get("input_type", "keywords"),
original_input=user_input,
confidence=float(intent_data.get("confidence", 0.7)),
confidence_reason=intent_data.get("confidence_reason"),
great_example=intent_data.get("great_example"),
needs_clarification=intent_data.get("needs_clarification", False),
clarifying_questions=intent_data.get("clarifying_questions", []),
)
except Exception as e:
logger.error(f"Failed to parse intent: {e}, intent_data: {intent_data}")
# Return fallback intent
from .unified_analyzer_utils import create_fallback_response
return create_fallback_response(user_input, [])
# Build queries
queries = []
for q in result.get("queries", []):
try:
# Normalize query purpose
query_purpose = _normalize_deliverable(q.get("purpose", "key_statistics"))
queries.append(ResearchQuery(
query=q.get("query", ""),
purpose=query_purpose,
provider=q.get("provider", "exa"),
priority=int(q.get("priority", 3)),
expected_results=q.get("expected_results", ""),
addresses_primary_question=q.get("addresses_primary_question", False),
addresses_secondary_questions=q.get("addresses_secondary_questions", []),
targets_focus_areas=q.get("targets_focus_areas", []),
covers_also_answering=q.get("covers_also_answering", []),
justification=q.get("justification"),
))
except Exception as e:
logger.warning(f"Failed to parse query: {e}, query: {q}")
# Deduplicate queries to avoid redundant API calls
queries = deduplicate_queries(queries, intent)
# Log warning if no queries after parsing
if not queries:
logger.warning("No valid queries parsed from LLM response")
return {
"success": True,
"intent": intent,
"queries": queries,
"enhanced_keywords": result.get("enhanced_keywords", []),
"research_angles": result.get("research_angles", []),
"recommended_provider": result.get("recommended_provider", "exa"),
"provider_justification": result.get("provider_justification", ""),
"exa_config": result.get("exa_config", {}),
"tavily_config": result.get("tavily_config", {}),
"trends_config": result.get("trends_config", {}), # Google Trends configuration
"analysis_summary": intent_data.get("analysis_summary", ""),
}