AI Analysis and Content Strategy fixes. Enhanced Strategy Routes refactoring.

This commit is contained in:
ajaysi
2026-01-10 19:32:50 +05:30
parent 0b63ae7fc1
commit 8193cdba67
298 changed files with 45678 additions and 10952 deletions

View File

@@ -35,7 +35,7 @@ blog_writer/
- Delegates to specialized modules for specific functionality
### Research Module (`research/`)
- **`ResearchService`**: Orchestrates comprehensive research using Google Search grounding
- **`ResearchService`**: Orchestrates comprehensive research using Exa neural search (currently Exa-only for testing)
- **`KeywordAnalyzer`**: AI-powered keyword analysis and extraction
- **`CompetitorAnalyzer`**: Competitor intelligence and market analysis
- **`ContentAngleGenerator`**: Strategic content angle discovery

View File

@@ -2,10 +2,12 @@
Research module for AI Blog Writer.
This module handles all research-related functionality including:
- Google Search grounding integration
- Exa neural search integration (primary provider for testing)
- Keyword analysis and competitor research
- Content angle discovery
- Research caching and optimization
Note: Currently Exa-only for testing. Google Search grounding code preserved for future use.
"""
from .research_service import ResearchService

View File

@@ -29,10 +29,15 @@ class ExaResearchProvider(BaseProvider):
# Determine category: use exa_category if set, otherwise map from source_types
category = config.exa_category if config.exa_category else self._map_source_type_to_category(config.source_types)
# Use exa_num_results if available, otherwise fallback to max_sources
num_results = config.exa_num_results if hasattr(config, 'exa_num_results') and config.exa_num_results else min(config.max_sources, 25)
# Cap at 100 as per Exa API limits
num_results = min(num_results, 100)
# Build search kwargs - use correct Exa API format
search_kwargs = {
'type': config.exa_search_type or "auto",
'num_results': min(config.max_sources, 25),
'num_results': num_results,
'text': {'max_characters': 1000},
'summary': {'query': f"Key insights about {topic}"},
'highlights': {
@@ -49,37 +54,133 @@ class ExaResearchProvider(BaseProvider):
if config.exa_exclude_domains:
search_kwargs['exclude_domains'] = config.exa_exclude_domains
# Add date filters if configured
if hasattr(config, 'exa_date_filter') and config.exa_date_filter:
search_kwargs['start_published_date'] = config.exa_date_filter
if hasattr(config, 'exa_end_published_date') and config.exa_end_published_date:
search_kwargs['end_published_date'] = config.exa_end_published_date
if hasattr(config, 'exa_start_crawl_date') and config.exa_start_crawl_date:
search_kwargs['start_crawl_date'] = config.exa_start_crawl_date
if hasattr(config, 'exa_end_crawl_date') and config.exa_end_crawl_date:
search_kwargs['end_crawl_date'] = config.exa_end_crawl_date
# Add context if configured (supports boolean or object with maxCharacters)
if hasattr(config, 'exa_context') and config.exa_context is not None:
if config.exa_context:
if hasattr(config, 'exa_context_max_characters') and config.exa_context_max_characters:
search_kwargs['context'] = {'maxCharacters': config.exa_context_max_characters}
else:
search_kwargs['context'] = True
# If False, don't add context parameter (default behavior)
# Add text filters if configured
if hasattr(config, 'exa_include_text') and config.exa_include_text:
search_kwargs['include_text'] = config.exa_include_text
if hasattr(config, 'exa_exclude_text') and config.exa_exclude_text:
search_kwargs['exclude_text'] = config.exa_exclude_text
logger.info(f"[Exa Research] Executing search: {query}")
# Execute Exa search - pass contents parameters directly, not nested
try:
# Build optional parameters dict
optional_params = {}
if category:
optional_params['category'] = category
if config.exa_include_domains:
optional_params['include_domains'] = config.exa_include_domains
if config.exa_exclude_domains:
optional_params['exclude_domains'] = config.exa_exclude_domains
if hasattr(config, 'exa_date_filter') and config.exa_date_filter:
optional_params['start_published_date'] = config.exa_date_filter
if hasattr(config, 'exa_end_published_date') and config.exa_end_published_date:
optional_params['end_published_date'] = config.exa_end_published_date
if hasattr(config, 'exa_start_crawl_date') and config.exa_start_crawl_date:
optional_params['start_crawl_date'] = config.exa_start_crawl_date
if hasattr(config, 'exa_end_crawl_date') and config.exa_end_crawl_date:
optional_params['end_crawl_date'] = config.exa_end_crawl_date
# Add context if configured (supports boolean or object with maxCharacters)
if hasattr(config, 'exa_context') and config.exa_context:
if hasattr(config, 'exa_context_max_characters') and config.exa_context_max_characters:
optional_params['context'] = {'maxCharacters': config.exa_context_max_characters}
else:
optional_params['context'] = True
# Add text filters if configured
if hasattr(config, 'exa_include_text') and config.exa_include_text:
optional_params['include_text'] = config.exa_include_text
if hasattr(config, 'exa_exclude_text') and config.exa_exclude_text:
optional_params['exclude_text'] = config.exa_exclude_text
# Add additional_queries for Deep search (only works with type="deep")
if config.exa_search_type == 'deep' and hasattr(config, 'exa_additional_queries') and config.exa_additional_queries:
optional_params['additional_queries'] = config.exa_additional_queries
# Build contents parameters (text, summary, highlights)
text_params = {}
if hasattr(config, 'exa_text_max_characters') and config.exa_text_max_characters:
text_params['max_characters'] = config.exa_text_max_characters
else:
text_params['max_characters'] = 1000 # Default
summary_params = {}
if hasattr(config, 'exa_summary_query') and config.exa_summary_query:
summary_params['query'] = config.exa_summary_query
else:
summary_params['query'] = f"Key insights about {topic}" # Default
highlights_params = {}
if hasattr(config, 'exa_highlights') and config.exa_highlights:
if hasattr(config, 'exa_highlights_num_sentences') and config.exa_highlights_num_sentences:
highlights_params['num_sentences'] = config.exa_highlights_num_sentences
else:
highlights_params['num_sentences'] = 2 # Default
if hasattr(config, 'exa_highlights_per_url') and config.exa_highlights_per_url:
highlights_params['highlights_per_url'] = config.exa_highlights_per_url
else:
highlights_params['highlights_per_url'] = 3 # Default
results = self.exa.search_and_contents(
query,
text={'max_characters': 1000},
summary={'query': f"Key insights about {topic}"},
highlights={'num_sentences': 2, 'highlights_per_url': 3},
text=text_params,
summary=summary_params,
highlights=highlights_params if highlights_params else None,
type=config.exa_search_type or "auto",
num_results=min(config.max_sources, 25),
**({k: v for k, v in {
'category': category,
'include_domains': config.exa_include_domains,
'exclude_domains': config.exa_exclude_domains
}.items() if v})
num_results=num_results,
**optional_params
)
except Exception as e:
logger.error(f"[Exa Research] API call failed: {e}")
# Try simpler call without contents if the above fails
try:
logger.info("[Exa Research] Retrying with simplified parameters")
# Build minimal optional parameters for retry
optional_params = {}
if category:
optional_params['category'] = category
if config.exa_include_domains:
optional_params['include_domains'] = config.exa_include_domains
if config.exa_exclude_domains:
optional_params['exclude_domains'] = config.exa_exclude_domains
if hasattr(config, 'exa_date_filter') and config.exa_date_filter:
optional_params['start_published_date'] = config.exa_date_filter
if hasattr(config, 'exa_end_published_date') and config.exa_end_published_date:
optional_params['end_published_date'] = config.exa_end_published_date
if hasattr(config, 'exa_start_crawl_date') and config.exa_start_crawl_date:
optional_params['start_crawl_date'] = config.exa_start_crawl_date
if hasattr(config, 'exa_end_crawl_date') and config.exa_end_crawl_date:
optional_params['end_crawl_date'] = config.exa_end_crawl_date
# Add additional_queries for Deep search (only works with type="deep")
if config.exa_search_type == 'deep' and hasattr(config, 'exa_additional_queries') and config.exa_additional_queries:
optional_params['additional_queries'] = config.exa_additional_queries
results = self.exa.search_and_contents(
query,
type=config.exa_search_type or "auto",
num_results=min(config.max_sources, 25),
**({k: v for k, v in {
'category': category,
'include_domains': config.exa_include_domains,
'exclude_domains': config.exa_exclude_domains
}.items() if v})
num_results=num_results,
**optional_params
)
except Exception as retry_error:
logger.error(f"[Exa Research] Retry also failed: {retry_error}")

View File

@@ -31,7 +31,11 @@ from .research_strategies import get_strategy_for_mode
class ResearchService:
"""Service for conducting comprehensive research using Google Search grounding."""
"""Service for conducting comprehensive research using Exa neural search.
Currently supports Exa as the primary and only provider for testing and debugging.
Google Search grounding code is preserved for future use.
"""
def __init__(self):
self.keyword_analyzer = KeywordAnalyzer()
@@ -43,9 +47,11 @@ class ResearchService:
async def research(self, request: BlogResearchRequest, user_id: str) -> BlogResearchResponse:
"""
Stage 1: Research & Strategy (AI Orchestration)
Uses ONLY Gemini's native Google Search grounding - ONE API call for everything.
Uses Exa neural search as the primary research provider.
Follows LinkedIn service pattern for efficiency and cost optimization.
Includes intelligent caching for exact keyword matches.
Note: Currently Exa-only for testing. Failures will raise errors instead of falling back.
"""
try:
from services.cache.research_cache import research_cache
@@ -88,7 +94,7 @@ class ResearchService:
# Determine research mode and get appropriate strategy
research_mode = request.research_mode or ResearchMode.BASIC
config = request.config or ResearchConfig(mode=research_mode, provider=ResearchProvider.GOOGLE)
config = request.config or ResearchConfig(mode=research_mode, provider=ResearchProvider.EXA)
strategy = get_strategy_for_mode(research_mode)
logger.info(f"Research: mode={research_mode.value}, provider={config.provider.value}")
@@ -96,7 +102,11 @@ class ResearchService:
# Build research prompt based on strategy
research_prompt = strategy.build_research_prompt(topic, industry, target_audience, config)
# Route to appropriate provider
# Currently Exa-only for testing - fail if other providers are requested
if config.provider != ResearchProvider.EXA:
raise ValueError(f"Only Exa provider is currently supported for testing. Requested provider: {config.provider.value}")
# Route to Exa provider
if config.provider == ResearchProvider.EXA:
# Exa research workflow
from .exa_provider import ExaResearchProvider
@@ -145,13 +155,9 @@ class ResearchService:
grounding_metadata = None # Exa doesn't provide grounding metadata
except RuntimeError as e:
if "EXA_API_KEY not configured" in str(e):
logger.warning("Exa not configured, falling back to Google")
config.provider = ResearchProvider.GOOGLE
# Continue to Google flow below
raw_result = None
else:
raise
# Fail fast - no fallback for testing/debugging
logger.error(f"Exa research failed: {e}")
raise RuntimeError(f"Exa research failed: {e}. Please ensure EXA_API_KEY is configured.") from e
elif config.provider == ResearchProvider.TAVILY:
# Tavily research workflow
@@ -231,41 +237,13 @@ class ResearchService:
grounding_metadata = None # Tavily doesn't provide grounding metadata
except RuntimeError as e:
if "TAVILY_API_KEY not configured" in str(e):
logger.warning("Tavily not configured, falling back to Google")
config.provider = ResearchProvider.GOOGLE
# Continue to Google flow below
raw_result = None
else:
raise
if config.provider not in [ResearchProvider.EXA, ResearchProvider.TAVILY]:
# Google research (existing flow) or fallback from Exa
from .google_provider import GoogleResearchProvider
import time
api_start_time = time.time()
google_provider = GoogleResearchProvider()
gemini_result = await google_provider.search(
research_prompt, topic, industry, target_audience, config, user_id
)
api_duration_ms = (time.time() - api_start_time) * 1000
# Log API call performance
blog_writer_logger.log_api_call(
"gemini_grounded",
"generate_grounded_content",
api_duration_ms,
token_usage=gemini_result.get("token_usage", {}),
content_length=len(gemini_result.get("content", ""))
)
# Extract sources and content
sources = self._extract_sources_from_grounding(gemini_result)
content = gemini_result.get("content", "")
search_widget = gemini_result.get("search_widget", "") or ""
search_queries = gemini_result.get("search_queries", []) or []
grounding_metadata = self._extract_grounding_metadata(gemini_result)
# Fail fast - no fallback for testing/debugging
logger.error(f"Tavily research failed: {e}")
raise RuntimeError(f"Tavily research failed: {e}. Please ensure TAVILY_API_KEY is configured.") from e
# Validate that we have content and sources before proceeding
if 'content' not in locals() or 'sources' not in locals():
raise RuntimeError(f"{config.provider.value} research did not return content or sources. Research failed.")
# Continue with common analysis (same for both providers)
keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords, user_id=user_id)
@@ -434,7 +412,7 @@ class ResearchService:
# Determine research mode and get appropriate strategy
research_mode = request.research_mode or ResearchMode.BASIC
config = request.config or ResearchConfig(mode=research_mode, provider=ResearchProvider.GOOGLE)
config = request.config or ResearchConfig(mode=research_mode, provider=ResearchProvider.EXA)
strategy = get_strategy_for_mode(research_mode)
logger.info(f"Research: mode={research_mode.value}, provider={config.provider.value}")
@@ -442,7 +420,11 @@ class ResearchService:
# Build research prompt based on strategy
research_prompt = strategy.build_research_prompt(topic, industry, target_audience, config)
# Route to appropriate provider
# Currently Exa-only for testing - fail if other providers are requested
if config.provider != ResearchProvider.EXA:
raise ValueError(f"Only Exa provider is currently supported for testing. Requested provider: {config.provider.value}")
# Route to Exa provider
if config.provider == ResearchProvider.EXA:
# Exa research workflow
from .exa_provider import ExaResearchProvider
@@ -495,13 +477,10 @@ class ResearchService:
grounding_metadata = None # Exa doesn't provide grounding metadata
except RuntimeError as e:
if "EXA_API_KEY not configured" in str(e):
logger.warning("Exa not configured, falling back to Google")
await task_manager.update_progress(task_id, "⚠️ Exa not configured, falling back to Google Search")
config.provider = ResearchProvider.GOOGLE
# Continue to Google flow below
else:
raise
# Fail fast - no fallback for testing/debugging
logger.error(f"Exa research failed: {e}")
await task_manager.update_progress(task_id, f" Exa research failed: {str(e)}")
raise RuntimeError(f"Exa research failed: {e}. Please ensure EXA_API_KEY is configured.") from e
elif config.provider == ResearchProvider.TAVILY:
# Tavily research workflow
@@ -581,43 +560,18 @@ class ResearchService:
grounding_metadata = None # Tavily doesn't provide grounding metadata
except RuntimeError as e:
if "TAVILY_API_KEY not configured" in str(e):
logger.warning("Tavily not configured, falling back to Google")
await task_manager.update_progress(task_id, "⚠️ Tavily not configured, falling back to Google Search")
config.provider = ResearchProvider.GOOGLE
# Continue to Google flow below
else:
raise
if config.provider not in [ResearchProvider.EXA, ResearchProvider.TAVILY]:
# Google research (existing flow)
from .google_provider import GoogleResearchProvider
await task_manager.update_progress(task_id, "🌐 Connecting to Google Search grounding...")
google_provider = GoogleResearchProvider()
await task_manager.update_progress(task_id, "🤖 Making AI request to Gemini with Google Search grounding...")
try:
gemini_result = await google_provider.search(
research_prompt, topic, industry, target_audience, config, user_id
)
except HTTPException as http_error:
logger.error(f"Subscription limit exceeded for Google research: {http_error.detail}")
await task_manager.update_progress(task_id, f"❌ Subscription limit exceeded: {http_error.detail.get('message', str(http_error.detail)) if isinstance(http_error.detail, dict) else str(http_error.detail)}")
raise
await task_manager.update_progress(task_id, "📊 Processing research results and extracting insights...")
# Extract sources and content
# Handle None result case
if gemini_result is None:
logger.error("gemini_result is None after search - this should not happen if HTTPException was raised")
raise ValueError("Research result is None - search operation failed unexpectedly")
sources = self._extract_sources_from_grounding(gemini_result)
content = gemini_result.get("content", "") if isinstance(gemini_result, dict) else ""
search_widget = gemini_result.get("search_widget", "") or "" if isinstance(gemini_result, dict) else ""
search_queries = gemini_result.get("search_queries", []) or [] if isinstance(gemini_result, dict) else []
grounding_metadata = self._extract_grounding_metadata(gemini_result)
# Fail fast - no fallback for testing/debugging
logger.error(f"Tavily research failed: {e}")
await task_manager.update_progress(task_id, f" Tavily research failed: {str(e)}")
raise RuntimeError(f"Tavily research failed: {e}. Please ensure TAVILY_API_KEY is configured.") from e
# Validate that we have content and sources before proceeding
if config.provider == ResearchProvider.EXA and ('content' not in locals() or 'sources' not in locals()):
await task_manager.update_progress(task_id, "❌ Exa research did not return content or sources")
raise RuntimeError("Exa research did not return content or sources. Research failed.")
elif config.provider == ResearchProvider.TAVILY and ('content' not in locals() or 'sources' not in locals()):
await task_manager.update_progress(task_id, "❌ Tavily research did not return content or sources")
raise RuntimeError("Tavily research did not return content or sources. Research failed.")
# Continue with common analysis (same for both providers)
await task_manager.update_progress(task_id, "🔍 Analyzing keywords and content angles...")