AI Analysis and Content Strategy fixes. Enhanced Strategy Routes refactoring.
This commit is contained in:
@@ -35,7 +35,7 @@ blog_writer/
|
||||
- Delegates to specialized modules for specific functionality
|
||||
|
||||
### Research Module (`research/`)
|
||||
- **`ResearchService`**: Orchestrates comprehensive research using Google Search grounding
|
||||
- **`ResearchService`**: Orchestrates comprehensive research using Exa neural search (currently Exa-only for testing)
|
||||
- **`KeywordAnalyzer`**: AI-powered keyword analysis and extraction
|
||||
- **`CompetitorAnalyzer`**: Competitor intelligence and market analysis
|
||||
- **`ContentAngleGenerator`**: Strategic content angle discovery
|
||||
|
||||
@@ -2,10 +2,12 @@
|
||||
Research module for AI Blog Writer.
|
||||
|
||||
This module handles all research-related functionality including:
|
||||
- Google Search grounding integration
|
||||
- Exa neural search integration (primary provider for testing)
|
||||
- Keyword analysis and competitor research
|
||||
- Content angle discovery
|
||||
- Research caching and optimization
|
||||
|
||||
Note: Currently Exa-only for testing. Google Search grounding code preserved for future use.
|
||||
"""
|
||||
|
||||
from .research_service import ResearchService
|
||||
|
||||
@@ -29,10 +29,15 @@ class ExaResearchProvider(BaseProvider):
|
||||
# Determine category: use exa_category if set, otherwise map from source_types
|
||||
category = config.exa_category if config.exa_category else self._map_source_type_to_category(config.source_types)
|
||||
|
||||
# Use exa_num_results if available, otherwise fallback to max_sources
|
||||
num_results = config.exa_num_results if hasattr(config, 'exa_num_results') and config.exa_num_results else min(config.max_sources, 25)
|
||||
# Cap at 100 as per Exa API limits
|
||||
num_results = min(num_results, 100)
|
||||
|
||||
# Build search kwargs - use correct Exa API format
|
||||
search_kwargs = {
|
||||
'type': config.exa_search_type or "auto",
|
||||
'num_results': min(config.max_sources, 25),
|
||||
'num_results': num_results,
|
||||
'text': {'max_characters': 1000},
|
||||
'summary': {'query': f"Key insights about {topic}"},
|
||||
'highlights': {
|
||||
@@ -49,37 +54,133 @@ class ExaResearchProvider(BaseProvider):
|
||||
if config.exa_exclude_domains:
|
||||
search_kwargs['exclude_domains'] = config.exa_exclude_domains
|
||||
|
||||
# Add date filters if configured
|
||||
if hasattr(config, 'exa_date_filter') and config.exa_date_filter:
|
||||
search_kwargs['start_published_date'] = config.exa_date_filter
|
||||
if hasattr(config, 'exa_end_published_date') and config.exa_end_published_date:
|
||||
search_kwargs['end_published_date'] = config.exa_end_published_date
|
||||
if hasattr(config, 'exa_start_crawl_date') and config.exa_start_crawl_date:
|
||||
search_kwargs['start_crawl_date'] = config.exa_start_crawl_date
|
||||
if hasattr(config, 'exa_end_crawl_date') and config.exa_end_crawl_date:
|
||||
search_kwargs['end_crawl_date'] = config.exa_end_crawl_date
|
||||
|
||||
# Add context if configured (supports boolean or object with maxCharacters)
|
||||
if hasattr(config, 'exa_context') and config.exa_context is not None:
|
||||
if config.exa_context:
|
||||
if hasattr(config, 'exa_context_max_characters') and config.exa_context_max_characters:
|
||||
search_kwargs['context'] = {'maxCharacters': config.exa_context_max_characters}
|
||||
else:
|
||||
search_kwargs['context'] = True
|
||||
# If False, don't add context parameter (default behavior)
|
||||
|
||||
# Add text filters if configured
|
||||
if hasattr(config, 'exa_include_text') and config.exa_include_text:
|
||||
search_kwargs['include_text'] = config.exa_include_text
|
||||
if hasattr(config, 'exa_exclude_text') and config.exa_exclude_text:
|
||||
search_kwargs['exclude_text'] = config.exa_exclude_text
|
||||
|
||||
logger.info(f"[Exa Research] Executing search: {query}")
|
||||
|
||||
# Execute Exa search - pass contents parameters directly, not nested
|
||||
try:
|
||||
# Build optional parameters dict
|
||||
optional_params = {}
|
||||
if category:
|
||||
optional_params['category'] = category
|
||||
if config.exa_include_domains:
|
||||
optional_params['include_domains'] = config.exa_include_domains
|
||||
if config.exa_exclude_domains:
|
||||
optional_params['exclude_domains'] = config.exa_exclude_domains
|
||||
if hasattr(config, 'exa_date_filter') and config.exa_date_filter:
|
||||
optional_params['start_published_date'] = config.exa_date_filter
|
||||
if hasattr(config, 'exa_end_published_date') and config.exa_end_published_date:
|
||||
optional_params['end_published_date'] = config.exa_end_published_date
|
||||
if hasattr(config, 'exa_start_crawl_date') and config.exa_start_crawl_date:
|
||||
optional_params['start_crawl_date'] = config.exa_start_crawl_date
|
||||
if hasattr(config, 'exa_end_crawl_date') and config.exa_end_crawl_date:
|
||||
optional_params['end_crawl_date'] = config.exa_end_crawl_date
|
||||
# Add context if configured (supports boolean or object with maxCharacters)
|
||||
if hasattr(config, 'exa_context') and config.exa_context:
|
||||
if hasattr(config, 'exa_context_max_characters') and config.exa_context_max_characters:
|
||||
optional_params['context'] = {'maxCharacters': config.exa_context_max_characters}
|
||||
else:
|
||||
optional_params['context'] = True
|
||||
|
||||
# Add text filters if configured
|
||||
if hasattr(config, 'exa_include_text') and config.exa_include_text:
|
||||
optional_params['include_text'] = config.exa_include_text
|
||||
if hasattr(config, 'exa_exclude_text') and config.exa_exclude_text:
|
||||
optional_params['exclude_text'] = config.exa_exclude_text
|
||||
|
||||
# Add additional_queries for Deep search (only works with type="deep")
|
||||
if config.exa_search_type == 'deep' and hasattr(config, 'exa_additional_queries') and config.exa_additional_queries:
|
||||
optional_params['additional_queries'] = config.exa_additional_queries
|
||||
|
||||
# Build contents parameters (text, summary, highlights)
|
||||
text_params = {}
|
||||
if hasattr(config, 'exa_text_max_characters') and config.exa_text_max_characters:
|
||||
text_params['max_characters'] = config.exa_text_max_characters
|
||||
else:
|
||||
text_params['max_characters'] = 1000 # Default
|
||||
|
||||
summary_params = {}
|
||||
if hasattr(config, 'exa_summary_query') and config.exa_summary_query:
|
||||
summary_params['query'] = config.exa_summary_query
|
||||
else:
|
||||
summary_params['query'] = f"Key insights about {topic}" # Default
|
||||
|
||||
highlights_params = {}
|
||||
if hasattr(config, 'exa_highlights') and config.exa_highlights:
|
||||
if hasattr(config, 'exa_highlights_num_sentences') and config.exa_highlights_num_sentences:
|
||||
highlights_params['num_sentences'] = config.exa_highlights_num_sentences
|
||||
else:
|
||||
highlights_params['num_sentences'] = 2 # Default
|
||||
|
||||
if hasattr(config, 'exa_highlights_per_url') and config.exa_highlights_per_url:
|
||||
highlights_params['highlights_per_url'] = config.exa_highlights_per_url
|
||||
else:
|
||||
highlights_params['highlights_per_url'] = 3 # Default
|
||||
|
||||
results = self.exa.search_and_contents(
|
||||
query,
|
||||
text={'max_characters': 1000},
|
||||
summary={'query': f"Key insights about {topic}"},
|
||||
highlights={'num_sentences': 2, 'highlights_per_url': 3},
|
||||
text=text_params,
|
||||
summary=summary_params,
|
||||
highlights=highlights_params if highlights_params else None,
|
||||
type=config.exa_search_type or "auto",
|
||||
num_results=min(config.max_sources, 25),
|
||||
**({k: v for k, v in {
|
||||
'category': category,
|
||||
'include_domains': config.exa_include_domains,
|
||||
'exclude_domains': config.exa_exclude_domains
|
||||
}.items() if v})
|
||||
num_results=num_results,
|
||||
**optional_params
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[Exa Research] API call failed: {e}")
|
||||
# Try simpler call without contents if the above fails
|
||||
try:
|
||||
logger.info("[Exa Research] Retrying with simplified parameters")
|
||||
# Build minimal optional parameters for retry
|
||||
optional_params = {}
|
||||
if category:
|
||||
optional_params['category'] = category
|
||||
if config.exa_include_domains:
|
||||
optional_params['include_domains'] = config.exa_include_domains
|
||||
if config.exa_exclude_domains:
|
||||
optional_params['exclude_domains'] = config.exa_exclude_domains
|
||||
if hasattr(config, 'exa_date_filter') and config.exa_date_filter:
|
||||
optional_params['start_published_date'] = config.exa_date_filter
|
||||
if hasattr(config, 'exa_end_published_date') and config.exa_end_published_date:
|
||||
optional_params['end_published_date'] = config.exa_end_published_date
|
||||
if hasattr(config, 'exa_start_crawl_date') and config.exa_start_crawl_date:
|
||||
optional_params['start_crawl_date'] = config.exa_start_crawl_date
|
||||
if hasattr(config, 'exa_end_crawl_date') and config.exa_end_crawl_date:
|
||||
optional_params['end_crawl_date'] = config.exa_end_crawl_date
|
||||
|
||||
# Add additional_queries for Deep search (only works with type="deep")
|
||||
if config.exa_search_type == 'deep' and hasattr(config, 'exa_additional_queries') and config.exa_additional_queries:
|
||||
optional_params['additional_queries'] = config.exa_additional_queries
|
||||
|
||||
results = self.exa.search_and_contents(
|
||||
query,
|
||||
type=config.exa_search_type or "auto",
|
||||
num_results=min(config.max_sources, 25),
|
||||
**({k: v for k, v in {
|
||||
'category': category,
|
||||
'include_domains': config.exa_include_domains,
|
||||
'exclude_domains': config.exa_exclude_domains
|
||||
}.items() if v})
|
||||
num_results=num_results,
|
||||
**optional_params
|
||||
)
|
||||
except Exception as retry_error:
|
||||
logger.error(f"[Exa Research] Retry also failed: {retry_error}")
|
||||
|
||||
@@ -31,7 +31,11 @@ from .research_strategies import get_strategy_for_mode
|
||||
|
||||
|
||||
class ResearchService:
|
||||
"""Service for conducting comprehensive research using Google Search grounding."""
|
||||
"""Service for conducting comprehensive research using Exa neural search.
|
||||
|
||||
Currently supports Exa as the primary and only provider for testing and debugging.
|
||||
Google Search grounding code is preserved for future use.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.keyword_analyzer = KeywordAnalyzer()
|
||||
@@ -43,9 +47,11 @@ class ResearchService:
|
||||
async def research(self, request: BlogResearchRequest, user_id: str) -> BlogResearchResponse:
|
||||
"""
|
||||
Stage 1: Research & Strategy (AI Orchestration)
|
||||
Uses ONLY Gemini's native Google Search grounding - ONE API call for everything.
|
||||
Uses Exa neural search as the primary research provider.
|
||||
Follows LinkedIn service pattern for efficiency and cost optimization.
|
||||
Includes intelligent caching for exact keyword matches.
|
||||
|
||||
Note: Currently Exa-only for testing. Failures will raise errors instead of falling back.
|
||||
"""
|
||||
try:
|
||||
from services.cache.research_cache import research_cache
|
||||
@@ -88,7 +94,7 @@ class ResearchService:
|
||||
|
||||
# Determine research mode and get appropriate strategy
|
||||
research_mode = request.research_mode or ResearchMode.BASIC
|
||||
config = request.config or ResearchConfig(mode=research_mode, provider=ResearchProvider.GOOGLE)
|
||||
config = request.config or ResearchConfig(mode=research_mode, provider=ResearchProvider.EXA)
|
||||
strategy = get_strategy_for_mode(research_mode)
|
||||
|
||||
logger.info(f"Research: mode={research_mode.value}, provider={config.provider.value}")
|
||||
@@ -96,7 +102,11 @@ class ResearchService:
|
||||
# Build research prompt based on strategy
|
||||
research_prompt = strategy.build_research_prompt(topic, industry, target_audience, config)
|
||||
|
||||
# Route to appropriate provider
|
||||
# Currently Exa-only for testing - fail if other providers are requested
|
||||
if config.provider != ResearchProvider.EXA:
|
||||
raise ValueError(f"Only Exa provider is currently supported for testing. Requested provider: {config.provider.value}")
|
||||
|
||||
# Route to Exa provider
|
||||
if config.provider == ResearchProvider.EXA:
|
||||
# Exa research workflow
|
||||
from .exa_provider import ExaResearchProvider
|
||||
@@ -145,13 +155,9 @@ class ResearchService:
|
||||
grounding_metadata = None # Exa doesn't provide grounding metadata
|
||||
|
||||
except RuntimeError as e:
|
||||
if "EXA_API_KEY not configured" in str(e):
|
||||
logger.warning("Exa not configured, falling back to Google")
|
||||
config.provider = ResearchProvider.GOOGLE
|
||||
# Continue to Google flow below
|
||||
raw_result = None
|
||||
else:
|
||||
raise
|
||||
# Fail fast - no fallback for testing/debugging
|
||||
logger.error(f"Exa research failed: {e}")
|
||||
raise RuntimeError(f"Exa research failed: {e}. Please ensure EXA_API_KEY is configured.") from e
|
||||
|
||||
elif config.provider == ResearchProvider.TAVILY:
|
||||
# Tavily research workflow
|
||||
@@ -231,41 +237,13 @@ class ResearchService:
|
||||
grounding_metadata = None # Tavily doesn't provide grounding metadata
|
||||
|
||||
except RuntimeError as e:
|
||||
if "TAVILY_API_KEY not configured" in str(e):
|
||||
logger.warning("Tavily not configured, falling back to Google")
|
||||
config.provider = ResearchProvider.GOOGLE
|
||||
# Continue to Google flow below
|
||||
raw_result = None
|
||||
else:
|
||||
raise
|
||||
|
||||
if config.provider not in [ResearchProvider.EXA, ResearchProvider.TAVILY]:
|
||||
# Google research (existing flow) or fallback from Exa
|
||||
from .google_provider import GoogleResearchProvider
|
||||
import time
|
||||
|
||||
api_start_time = time.time()
|
||||
google_provider = GoogleResearchProvider()
|
||||
gemini_result = await google_provider.search(
|
||||
research_prompt, topic, industry, target_audience, config, user_id
|
||||
)
|
||||
api_duration_ms = (time.time() - api_start_time) * 1000
|
||||
|
||||
# Log API call performance
|
||||
blog_writer_logger.log_api_call(
|
||||
"gemini_grounded",
|
||||
"generate_grounded_content",
|
||||
api_duration_ms,
|
||||
token_usage=gemini_result.get("token_usage", {}),
|
||||
content_length=len(gemini_result.get("content", ""))
|
||||
)
|
||||
|
||||
# Extract sources and content
|
||||
sources = self._extract_sources_from_grounding(gemini_result)
|
||||
content = gemini_result.get("content", "")
|
||||
search_widget = gemini_result.get("search_widget", "") or ""
|
||||
search_queries = gemini_result.get("search_queries", []) or []
|
||||
grounding_metadata = self._extract_grounding_metadata(gemini_result)
|
||||
# Fail fast - no fallback for testing/debugging
|
||||
logger.error(f"Tavily research failed: {e}")
|
||||
raise RuntimeError(f"Tavily research failed: {e}. Please ensure TAVILY_API_KEY is configured.") from e
|
||||
|
||||
# Validate that we have content and sources before proceeding
|
||||
if 'content' not in locals() or 'sources' not in locals():
|
||||
raise RuntimeError(f"{config.provider.value} research did not return content or sources. Research failed.")
|
||||
|
||||
# Continue with common analysis (same for both providers)
|
||||
keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords, user_id=user_id)
|
||||
@@ -434,7 +412,7 @@ class ResearchService:
|
||||
|
||||
# Determine research mode and get appropriate strategy
|
||||
research_mode = request.research_mode or ResearchMode.BASIC
|
||||
config = request.config or ResearchConfig(mode=research_mode, provider=ResearchProvider.GOOGLE)
|
||||
config = request.config or ResearchConfig(mode=research_mode, provider=ResearchProvider.EXA)
|
||||
strategy = get_strategy_for_mode(research_mode)
|
||||
|
||||
logger.info(f"Research: mode={research_mode.value}, provider={config.provider.value}")
|
||||
@@ -442,7 +420,11 @@ class ResearchService:
|
||||
# Build research prompt based on strategy
|
||||
research_prompt = strategy.build_research_prompt(topic, industry, target_audience, config)
|
||||
|
||||
# Route to appropriate provider
|
||||
# Currently Exa-only for testing - fail if other providers are requested
|
||||
if config.provider != ResearchProvider.EXA:
|
||||
raise ValueError(f"Only Exa provider is currently supported for testing. Requested provider: {config.provider.value}")
|
||||
|
||||
# Route to Exa provider
|
||||
if config.provider == ResearchProvider.EXA:
|
||||
# Exa research workflow
|
||||
from .exa_provider import ExaResearchProvider
|
||||
@@ -495,13 +477,10 @@ class ResearchService:
|
||||
grounding_metadata = None # Exa doesn't provide grounding metadata
|
||||
|
||||
except RuntimeError as e:
|
||||
if "EXA_API_KEY not configured" in str(e):
|
||||
logger.warning("Exa not configured, falling back to Google")
|
||||
await task_manager.update_progress(task_id, "⚠️ Exa not configured, falling back to Google Search")
|
||||
config.provider = ResearchProvider.GOOGLE
|
||||
# Continue to Google flow below
|
||||
else:
|
||||
raise
|
||||
# Fail fast - no fallback for testing/debugging
|
||||
logger.error(f"Exa research failed: {e}")
|
||||
await task_manager.update_progress(task_id, f"❌ Exa research failed: {str(e)}")
|
||||
raise RuntimeError(f"Exa research failed: {e}. Please ensure EXA_API_KEY is configured.") from e
|
||||
|
||||
elif config.provider == ResearchProvider.TAVILY:
|
||||
# Tavily research workflow
|
||||
@@ -581,43 +560,18 @@ class ResearchService:
|
||||
grounding_metadata = None # Tavily doesn't provide grounding metadata
|
||||
|
||||
except RuntimeError as e:
|
||||
if "TAVILY_API_KEY not configured" in str(e):
|
||||
logger.warning("Tavily not configured, falling back to Google")
|
||||
await task_manager.update_progress(task_id, "⚠️ Tavily not configured, falling back to Google Search")
|
||||
config.provider = ResearchProvider.GOOGLE
|
||||
# Continue to Google flow below
|
||||
else:
|
||||
raise
|
||||
|
||||
if config.provider not in [ResearchProvider.EXA, ResearchProvider.TAVILY]:
|
||||
# Google research (existing flow)
|
||||
from .google_provider import GoogleResearchProvider
|
||||
|
||||
await task_manager.update_progress(task_id, "🌐 Connecting to Google Search grounding...")
|
||||
google_provider = GoogleResearchProvider()
|
||||
|
||||
await task_manager.update_progress(task_id, "🤖 Making AI request to Gemini with Google Search grounding...")
|
||||
try:
|
||||
gemini_result = await google_provider.search(
|
||||
research_prompt, topic, industry, target_audience, config, user_id
|
||||
)
|
||||
except HTTPException as http_error:
|
||||
logger.error(f"Subscription limit exceeded for Google research: {http_error.detail}")
|
||||
await task_manager.update_progress(task_id, f"❌ Subscription limit exceeded: {http_error.detail.get('message', str(http_error.detail)) if isinstance(http_error.detail, dict) else str(http_error.detail)}")
|
||||
raise
|
||||
|
||||
await task_manager.update_progress(task_id, "📊 Processing research results and extracting insights...")
|
||||
# Extract sources and content
|
||||
# Handle None result case
|
||||
if gemini_result is None:
|
||||
logger.error("gemini_result is None after search - this should not happen if HTTPException was raised")
|
||||
raise ValueError("Research result is None - search operation failed unexpectedly")
|
||||
|
||||
sources = self._extract_sources_from_grounding(gemini_result)
|
||||
content = gemini_result.get("content", "") if isinstance(gemini_result, dict) else ""
|
||||
search_widget = gemini_result.get("search_widget", "") or "" if isinstance(gemini_result, dict) else ""
|
||||
search_queries = gemini_result.get("search_queries", []) or [] if isinstance(gemini_result, dict) else []
|
||||
grounding_metadata = self._extract_grounding_metadata(gemini_result)
|
||||
# Fail fast - no fallback for testing/debugging
|
||||
logger.error(f"Tavily research failed: {e}")
|
||||
await task_manager.update_progress(task_id, f"❌ Tavily research failed: {str(e)}")
|
||||
raise RuntimeError(f"Tavily research failed: {e}. Please ensure TAVILY_API_KEY is configured.") from e
|
||||
|
||||
# Validate that we have content and sources before proceeding
|
||||
if config.provider == ResearchProvider.EXA and ('content' not in locals() or 'sources' not in locals()):
|
||||
await task_manager.update_progress(task_id, "❌ Exa research did not return content or sources")
|
||||
raise RuntimeError("Exa research did not return content or sources. Research failed.")
|
||||
elif config.provider == ResearchProvider.TAVILY and ('content' not in locals() or 'sources' not in locals()):
|
||||
await task_manager.update_progress(task_id, "❌ Tavily research did not return content or sources")
|
||||
raise RuntimeError("Tavily research did not return content or sources. Research failed.")
|
||||
|
||||
# Continue with common analysis (same for both providers)
|
||||
await task_manager.update_progress(task_id, "🔍 Analyzing keywords and content angles...")
|
||||
|
||||
Reference in New Issue
Block a user