AI Analysis and Content Strategy fixes. Enhanced Strategy Routes refactoring.

2026-01-10 19:32:50 +05:30
parent 0b63ae7fc1
commit 8193cdba67
298 changed files with 45678 additions and 10952 deletions
--- a/backend/services/research/intent/unified_result_parser.py
+++ b/backend/services/research/intent/unified_result_parser.py
@@ -0,0 +1,209 @@
+"""
+Result parsing logic for unified research analyzer.
+
+Parses LLM response into structured ResearchIntent, ResearchQuery,
+and configuration dictionaries.
+"""
+
+from typing import Dict, Any, List
+from loguru import logger
+
+from models.research_intent_models import (
+    ResearchIntent, ResearchQuery,
+    ResearchPurpose, ContentOutput, ExpectedDeliverable,
+    ResearchDepthLevel, InputType
+)
+from .query_deduplicator import deduplicate_queries
+
+
+def _normalize_purpose(value: str) -> str:
+    """Normalize purpose value to enum."""
+    if not value or not isinstance(value, str):
+        return "learn"
+    value_lower = value.lower()
+    # Check for exact match
+    for purpose in ResearchPurpose:
+        if value_lower == purpose.value or value_lower == purpose.name.lower():
+            return purpose.value
+    # Check for keywords in description
+    if "content" in value_lower or "write" in value_lower or "create" in value_lower or "blog" in value_lower:
+        return "create_content"
+    elif "compare" in value_lower or "comparison" in value_lower:
+        return "compare"
+    elif "decision" in value_lower or "choose" in value_lower:
+        return "make_decision"
+    elif "problem" in value_lower or "solve" in value_lower:
+        return "solve_problem"
+    elif "data" in value_lower or "statistic" in value_lower or "fact" in value_lower:
+        return "find_data"
+    elif "trend" in value_lower:
+        return "explore_trends"
+    elif "validat" in value_lower or "verify" in value_lower:
+        return "validate"
+    elif "idea" in value_lower or "brainstorm" in value_lower:
+        return "generate_ideas"
+    return "learn"
+
+
+def _normalize_content_output(value: str) -> str:
+    """Normalize content_output value to enum."""
+    if not value or not isinstance(value, str):
+        return "general"
+    value_lower = value.lower()
+    # Check for exact match
+    for output in ContentOutput:
+        if value_lower == output.value or value_lower == output.name.lower():
+            return output.value
+    # Check for keywords
+    if "blog" in value_lower or "article" in value_lower:
+        return "blog"
+    elif "podcast" in value_lower:
+        return "podcast"
+    elif "video" in value_lower:
+        return "video"
+    elif "social" in value_lower or "post" in value_lower:
+        return "social_post"
+    elif "newsletter" in value_lower:
+        return "newsletter"
+    elif "presentation" in value_lower or "slide" in value_lower:
+        return "presentation"
+    elif "report" in value_lower:
+        return "report"
+    elif "whitepaper" in value_lower or "white paper" in value_lower:
+        return "whitepaper"
+    elif "email" in value_lower:
+        return "email"
+    return "general"
+
+
+def _normalize_deliverable(value: str) -> str:
+    """Normalize deliverable value to enum."""
+    if not value or not isinstance(value, str):
+        return "key_statistics"
+    value_lower = value.lower().strip()
+    # Check for exact match first
+    for deliverable in ExpectedDeliverable:
+        if value_lower == deliverable.value or value_lower == deliverable.name.lower():
+            return deliverable.value
+    # Check for keywords (more aggressive matching)
+    if "statistic" in value_lower or "data" in value_lower or "number" in value_lower or "metric" in value_lower or "report" in value_lower:
+        return "key_statistics"
+    elif "quote" in value_lower or "expert" in value_lower:
+        return "expert_quotes"
+    elif "case" in value_lower or "study" in value_lower:
+        return "case_studies"
+    elif "compar" in value_lower or "compare" in value_lower or "landscape" in value_lower or "matrix" in value_lower:
+        return "comparisons"
+    elif "trend" in value_lower or "keyword" in value_lower or "seo" in value_lower:
+        return "trends"
+    elif "practice" in value_lower or "best" in value_lower or "guideline" in value_lower or "recommendation" in value_lower or "calendar" in value_lower:
+        return "best_practices"
+    elif "step" in value_lower or "how" in value_lower or "process" in value_lower or "guide" in value_lower or "outline" in value_lower or "heading" in value_lower:
+        return "step_by_step"
+    elif ("pro" in value_lower and "con" in value_lower) or "advantage" in value_lower or "disadvantage" in value_lower:
+        return "pros_cons"
+    elif "defin" in value_lower or "explain" in value_lower:
+        return "definitions"
+    elif "citation" in value_lower or "source" in value_lower or "reference" in value_lower:
+        return "citations"
+    elif "example" in value_lower or "sample" in value_lower:
+        return "examples"
+    elif "prediction" in value_lower or "future" in value_lower or "outlook" in value_lower:
+        return "predictions"
+    # Default fallback
+    return "key_statistics"
+
+
+def parse_unified_result(result: Dict[str, Any], user_input: str) -> Dict[str, Any]:
+    """
+    Parse the unified LLM result into structured response.
+    
+    Args:
+        result: Raw LLM response dictionary
+        user_input: Original user input for fallback values
+        
+    Returns:
+        Structured response with intent, queries, configs, etc.
+    """
+    intent_data = result.get("intent", {})
+    
+    # Normalize enum values
+    purpose_value = _normalize_purpose(intent_data.get("purpose", "learn"))
+    content_output_value = _normalize_content_output(intent_data.get("content_output", "general"))
+    
+    # Normalize deliverables list
+    deliverables_raw = intent_data.get("expected_deliverables", ["key_statistics"])
+    if not isinstance(deliverables_raw, list):
+        deliverables_raw = [deliverables_raw] if deliverables_raw else ["key_statistics"]
+    normalized_deliverables = [_normalize_deliverable(d) for d in deliverables_raw if d]
+    if not normalized_deliverables:
+        normalized_deliverables = ["key_statistics"]
+    
+    # Build ResearchIntent
+    try:
+        intent = ResearchIntent(
+            primary_question=intent_data.get("primary_question", user_input),
+            secondary_questions=intent_data.get("secondary_questions", []),
+            purpose=purpose_value,
+            content_output=content_output_value,
+            expected_deliverables=normalized_deliverables,
+            depth=intent_data.get("depth", "detailed"),
+            focus_areas=intent_data.get("focus_areas", []),
+            also_answering=intent_data.get("also_answering", []),
+            perspective=intent_data.get("perspective"),
+            time_sensitivity=intent_data.get("time_sensitivity"),
+            input_type=intent_data.get("input_type", "keywords"),
+            original_input=user_input,
+            confidence=float(intent_data.get("confidence", 0.7)),
+            confidence_reason=intent_data.get("confidence_reason"),
+            great_example=intent_data.get("great_example"),
+            needs_clarification=intent_data.get("needs_clarification", False),
+            clarifying_questions=intent_data.get("clarifying_questions", []),
+        )
+    except Exception as e:
+        logger.error(f"Failed to parse intent: {e}, intent_data: {intent_data}")
+        # Return fallback intent
+        from .unified_analyzer_utils import create_fallback_response
+        return create_fallback_response(user_input, [])
+    
+    # Build queries
+    queries = []
+    for q in result.get("queries", []):
+        try:
+            # Normalize query purpose
+            query_purpose = _normalize_deliverable(q.get("purpose", "key_statistics"))
+            queries.append(ResearchQuery(
+                query=q.get("query", ""),
+                purpose=query_purpose,
+                provider=q.get("provider", "exa"),
+                priority=int(q.get("priority", 3)),
+                expected_results=q.get("expected_results", ""),
+                addresses_primary_question=q.get("addresses_primary_question", False),
+                addresses_secondary_questions=q.get("addresses_secondary_questions", []),
+                targets_focus_areas=q.get("targets_focus_areas", []),
+                covers_also_answering=q.get("covers_also_answering", []),
+                justification=q.get("justification"),
+            ))
+        except Exception as e:
+            logger.warning(f"Failed to parse query: {e}, query: {q}")
+    
+    # Deduplicate queries to avoid redundant API calls
+    queries = deduplicate_queries(queries, intent)
+    
+    # Log warning if no queries after parsing
+    if not queries:
+        logger.warning("No valid queries parsed from LLM response")
+    
+    return {
+        "success": True,
+        "intent": intent,
+        "queries": queries,
+        "enhanced_keywords": result.get("enhanced_keywords", []),
+        "research_angles": result.get("research_angles", []),
+        "recommended_provider": result.get("recommended_provider", "exa"),
+        "provider_justification": result.get("provider_justification", ""),
+        "exa_config": result.get("exa_config", {}),
+        "tavily_config": result.get("tavily_config", {}),
+        "trends_config": result.get("trends_config", {}),  # Google Trends configuration
+        "analysis_summary": intent_data.get("analysis_summary", ""),
+    }