Research component integration, Copilotkit implementation, SEO copilotkit implementation, Wix SEO metadata complete, Wix SEO metadata review
This commit is contained in:
@@ -8,6 +8,7 @@ import time
|
||||
import json
|
||||
from typing import Dict, Any, List
|
||||
from loguru import logger
|
||||
from fastapi import HTTPException
|
||||
|
||||
from models.blog_models import (
|
||||
MediumBlogGenerateRequest,
|
||||
@@ -25,8 +26,20 @@ class MediumBlogGenerator:
|
||||
def __init__(self):
|
||||
self.cache = persistent_content_cache
|
||||
|
||||
async def generate_medium_blog_with_progress(self, req: MediumBlogGenerateRequest, task_id: str) -> MediumBlogGenerateResult:
|
||||
"""Use Gemini structured JSON to generate a medium-length blog in one call."""
|
||||
async def generate_medium_blog_with_progress(self, req: MediumBlogGenerateRequest, task_id: str, user_id: str) -> MediumBlogGenerateResult:
|
||||
"""Use Gemini structured JSON to generate a medium-length blog in one call.
|
||||
|
||||
Args:
|
||||
req: Medium blog generation request
|
||||
task_id: Task ID for progress updates
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
|
||||
Raises:
|
||||
ValueError: If user_id is not provided
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for medium blog generation (subscription checks and usage tracking)")
|
||||
|
||||
import time
|
||||
start = time.time()
|
||||
|
||||
@@ -156,7 +169,7 @@ class MediumBlogGenerator:
|
||||
- Use language that resonates with {audience}
|
||||
- Maintain consistent voice that reflects this persona's expertise
|
||||
"""
|
||||
|
||||
|
||||
prompt = (
|
||||
f"Write blog content for the following sections. Each section should be {req.globalTargetWords or 1000} words total, distributed across all sections.\n\n"
|
||||
f"Blog Title: {req.title}\n\n"
|
||||
@@ -176,11 +189,20 @@ class MediumBlogGenerator:
|
||||
f"Sections to write:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
|
||||
)
|
||||
|
||||
ai_resp = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=schema,
|
||||
system_prompt=system,
|
||||
)
|
||||
try:
|
||||
ai_resp = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=schema,
|
||||
system_prompt=system,
|
||||
user_id=user_id
|
||||
)
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit) to preserve error details
|
||||
raise
|
||||
except Exception as llm_error:
|
||||
# Wrap other errors
|
||||
logger.error(f"AI generation failed: {llm_error}")
|
||||
raise Exception(f"AI generation failed: {str(llm_error)}")
|
||||
|
||||
# Check for errors in AI response
|
||||
if not ai_resp or ai_resp.get("error"):
|
||||
|
||||
@@ -105,13 +105,20 @@ class BlogWriterService:
|
||||
return await self.research_service.research_with_progress(request, task_id, user_id)
|
||||
|
||||
# Outline Methods
|
||||
async def generate_outline(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
|
||||
"""Generate AI-powered outline from research data."""
|
||||
return await self.outline_service.generate_outline(request)
|
||||
async def generate_outline(self, request: BlogOutlineRequest, user_id: str) -> BlogOutlineResponse:
|
||||
"""Generate AI-powered outline from research data.
|
||||
|
||||
Args:
|
||||
request: Outline generation request with research data
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for outline generation (subscription checks and usage tracking)")
|
||||
return await self.outline_service.generate_outline(request, user_id)
|
||||
|
||||
async def generate_outline_with_progress(self, request: BlogOutlineRequest, task_id: str) -> BlogOutlineResponse:
|
||||
async def generate_outline_with_progress(self, request: BlogOutlineRequest, task_id: str, user_id: str) -> BlogOutlineResponse:
|
||||
"""Generate outline with real-time progress updates."""
|
||||
return await self.outline_service.generate_outline_with_progress(request, task_id)
|
||||
return await self.outline_service.generate_outline_with_progress(request, task_id, user_id)
|
||||
|
||||
async def refine_outline(self, request: BlogOutlineRefineRequest) -> BlogOutlineResponse:
|
||||
"""Refine outline with HITL operations."""
|
||||
@@ -334,9 +341,17 @@ class BlogWriterService:
|
||||
# TODO: Move to content module
|
||||
return BlogPublishResponse(success=True, platform=request.platform, url="https://example.com/post")
|
||||
|
||||
async def generate_medium_blog_with_progress(self, req: MediumBlogGenerateRequest, task_id: str) -> MediumBlogGenerateResult:
|
||||
"""Use Gemini structured JSON to generate a medium-length blog in one call."""
|
||||
return await self.medium_blog_generator.generate_medium_blog_with_progress(req, task_id)
|
||||
async def generate_medium_blog_with_progress(self, req: MediumBlogGenerateRequest, task_id: str, user_id: str) -> MediumBlogGenerateResult:
|
||||
"""Use Gemini structured JSON to generate a medium-length blog in one call.
|
||||
|
||||
Args:
|
||||
req: Medium blog generation request
|
||||
task_id: Task ID for progress updates
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for medium blog generation (subscription checks and usage tracking)")
|
||||
return await self.medium_blog_generator.generate_medium_blog_with_progress(req, task_id, user_id)
|
||||
|
||||
async def analyze_flow_basic(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Analyze flow metrics for entire blog using single AI call (cost-effective)."""
|
||||
|
||||
@@ -42,10 +42,20 @@ class OutlineGenerator:
|
||||
self.response_processor = ResponseProcessor()
|
||||
self.parallel_processor = ParallelProcessor(self.source_mapper, self.grounding_engine)
|
||||
|
||||
async def generate(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
|
||||
async def generate(self, request: BlogOutlineRequest, user_id: str) -> BlogOutlineResponse:
|
||||
"""
|
||||
Generate AI-powered outline using research results
|
||||
Generate AI-powered outline using research results.
|
||||
|
||||
Args:
|
||||
request: Outline generation request with research data
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
|
||||
Raises:
|
||||
ValueError: If user_id is not provided
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for outline generation (subscription checks and usage tracking)")
|
||||
|
||||
# Extract research insights
|
||||
research = request.research
|
||||
primary_keywords = research.keyword_analysis.get('primary', [])
|
||||
@@ -68,15 +78,15 @@ class OutlineGenerator:
|
||||
# Define schema with proper property ordering (critical for Gemini API)
|
||||
outline_schema = self.prompt_builder.get_outline_schema()
|
||||
|
||||
# Generate outline using structured JSON response with retry logic
|
||||
outline_data = await self.response_processor.generate_with_retry(outline_prompt, outline_schema)
|
||||
# Generate outline using structured JSON response with retry logic (user_id required)
|
||||
outline_data = await self.response_processor.generate_with_retry(outline_prompt, outline_schema, user_id)
|
||||
|
||||
# Convert to BlogOutlineSection objects
|
||||
outline_sections = self.response_processor.convert_to_sections(outline_data, sources)
|
||||
|
||||
# Run parallel processing for speed optimization
|
||||
# Run parallel processing for speed optimization (user_id required)
|
||||
mapped_sections, grounding_insights = await self.parallel_processor.run_parallel_processing_async(
|
||||
outline_sections, research
|
||||
outline_sections, research, user_id
|
||||
)
|
||||
|
||||
# Enhance sections with grounding insights
|
||||
@@ -85,9 +95,9 @@ class OutlineGenerator:
|
||||
mapped_sections, research.grounding_metadata, grounding_insights
|
||||
)
|
||||
|
||||
# Optimize outline for better flow, SEO, and engagement
|
||||
# Optimize outline for better flow, SEO, and engagement (user_id required)
|
||||
logger.info("Optimizing outline for better flow and engagement...")
|
||||
optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization")
|
||||
optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization", user_id)
|
||||
|
||||
# Rebalance word counts for optimal distribution
|
||||
target_words = request.word_count or 1500
|
||||
@@ -118,10 +128,21 @@ class OutlineGenerator:
|
||||
research_coverage=research_coverage
|
||||
)
|
||||
|
||||
async def generate_with_progress(self, request: BlogOutlineRequest, task_id: str) -> BlogOutlineResponse:
|
||||
async def generate_with_progress(self, request: BlogOutlineRequest, task_id: str, user_id: str) -> BlogOutlineResponse:
|
||||
"""
|
||||
Outline generation method with progress updates for real-time feedback.
|
||||
|
||||
Args:
|
||||
request: Outline generation request with research data
|
||||
task_id: Task ID for progress updates
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
|
||||
Raises:
|
||||
ValueError: If user_id is not provided
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for outline generation (subscription checks and usage tracking)")
|
||||
|
||||
from api.blog_writer.task_manager import task_manager
|
||||
|
||||
# Extract research insights
|
||||
@@ -150,17 +171,17 @@ class OutlineGenerator:
|
||||
|
||||
await task_manager.update_progress(task_id, "🔄 Making AI request to generate structured outline...")
|
||||
|
||||
# Generate outline using structured JSON response with retry logic
|
||||
outline_data = await self.response_processor.generate_with_retry(outline_prompt, outline_schema, task_id)
|
||||
# Generate outline using structured JSON response with retry logic (user_id required for subscription checks)
|
||||
outline_data = await self.response_processor.generate_with_retry(outline_prompt, outline_schema, user_id, task_id)
|
||||
|
||||
await task_manager.update_progress(task_id, "📝 Processing outline structure and validating sections...")
|
||||
|
||||
# Convert to BlogOutlineSection objects
|
||||
outline_sections = self.response_processor.convert_to_sections(outline_data, sources)
|
||||
|
||||
# Run parallel processing for speed optimization
|
||||
# Run parallel processing for speed optimization (user_id required for subscription checks)
|
||||
mapped_sections, grounding_insights = await self.parallel_processor.run_parallel_processing(
|
||||
outline_sections, research, task_id
|
||||
outline_sections, research, user_id, task_id
|
||||
)
|
||||
|
||||
# Enhance sections with grounding insights (depends on both previous tasks)
|
||||
@@ -169,9 +190,9 @@ class OutlineGenerator:
|
||||
mapped_sections, research.grounding_metadata, grounding_insights
|
||||
)
|
||||
|
||||
# Optimize outline for better flow, SEO, and engagement
|
||||
# Optimize outline for better flow, SEO, and engagement (user_id required for subscription checks)
|
||||
await task_manager.update_progress(task_id, "🎯 Optimizing outline for better flow and engagement...")
|
||||
optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization")
|
||||
optimized_sections = await self.outline_optimizer.optimize(grounding_enhanced_sections, "comprehensive optimization", user_id)
|
||||
|
||||
# Rebalance word counts for optimal distribution
|
||||
await task_manager.update_progress(task_id, "⚖️ Rebalancing word count distribution...")
|
||||
|
||||
@@ -13,8 +13,23 @@ from models.blog_models import BlogOutlineSection
|
||||
class OutlineOptimizer:
|
||||
"""Optimizes outlines for better flow, SEO, and engagement."""
|
||||
|
||||
async def optimize(self, outline: List[BlogOutlineSection], focus: str = "general optimization") -> List[BlogOutlineSection]:
|
||||
"""Optimize entire outline for better flow, SEO, and engagement."""
|
||||
async def optimize(self, outline: List[BlogOutlineSection], focus: str, user_id: str) -> List[BlogOutlineSection]:
|
||||
"""Optimize entire outline for better flow, SEO, and engagement.
|
||||
|
||||
Args:
|
||||
outline: List of outline sections to optimize
|
||||
focus: Optimization focus (e.g., "general optimization")
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
|
||||
Returns:
|
||||
List of optimized outline sections
|
||||
|
||||
Raises:
|
||||
ValueError: If user_id is not provided
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for outline optimization (subscription checks and usage tracking)")
|
||||
|
||||
outline_text = "\n".join([f"{i+1}. {s.heading}" for i, s in enumerate(outline)])
|
||||
|
||||
optimization_prompt = f"""Optimize this blog outline for better flow, engagement, and SEO:
|
||||
@@ -67,7 +82,8 @@ Return JSON format:
|
||||
optimized_data = llm_text_gen(
|
||||
prompt=optimization_prompt,
|
||||
json_struct=optimization_schema,
|
||||
system_prompt=None
|
||||
system_prompt=None,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Handle the new schema format with "outline" wrapper
|
||||
|
||||
@@ -29,11 +29,21 @@ class OutlineService:
|
||||
self.outline_optimizer = OutlineOptimizer()
|
||||
self.section_enhancer = SectionEnhancer()
|
||||
|
||||
async def generate_outline(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
|
||||
async def generate_outline(self, request: BlogOutlineRequest, user_id: str) -> BlogOutlineResponse:
|
||||
"""
|
||||
Stage 2: Content Planning with AI-generated outline using research results
|
||||
Uses Gemini with research data to create comprehensive, SEO-optimized outline
|
||||
Stage 2: Content Planning with AI-generated outline using research results.
|
||||
Uses Gemini with research data to create comprehensive, SEO-optimized outline.
|
||||
|
||||
Args:
|
||||
request: Outline generation request with research data
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
|
||||
Raises:
|
||||
ValueError: If user_id is not provided
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for outline generation (subscription checks and usage tracking)")
|
||||
|
||||
# Extract cache parameters - use original user keywords for consistent caching
|
||||
keywords = request.research.original_keywords or request.research.keyword_analysis.get('primary', [])
|
||||
industry = getattr(request.persona, 'industry', 'general') if request.persona else 'general'
|
||||
@@ -56,9 +66,9 @@ class OutlineService:
|
||||
logger.info(f"Using cached outline for keywords: {keywords}")
|
||||
return BlogOutlineResponse(**cached_result)
|
||||
|
||||
# Generate new outline if not cached
|
||||
# Generate new outline if not cached (user_id required)
|
||||
logger.info(f"Generating new outline for keywords: {keywords}")
|
||||
result = await self.outline_generator.generate(request)
|
||||
result = await self.outline_generator.generate(request, user_id)
|
||||
|
||||
# Cache the result
|
||||
persistent_outline_cache.cache_outline(
|
||||
@@ -73,7 +83,7 @@ class OutlineService:
|
||||
|
||||
return result
|
||||
|
||||
async def generate_outline_with_progress(self, request: BlogOutlineRequest, task_id: str) -> BlogOutlineResponse:
|
||||
async def generate_outline_with_progress(self, request: BlogOutlineRequest, task_id: str, user_id: str) -> BlogOutlineResponse:
|
||||
"""
|
||||
Outline generation method with progress updates for real-time feedback.
|
||||
"""
|
||||
@@ -104,7 +114,7 @@ class OutlineService:
|
||||
|
||||
# Generate new outline if not cached
|
||||
logger.info(f"Generating new outline for keywords: {keywords} (with progress updates)")
|
||||
result = await self.outline_generator.generate_with_progress(request, task_id)
|
||||
result = await self.outline_generator.generate_with_progress(request, task_id, user_id)
|
||||
|
||||
# Cache the result
|
||||
persistent_outline_cache.cache_outline(
|
||||
|
||||
@@ -17,18 +17,25 @@ class ParallelProcessor:
|
||||
self.source_mapper = source_mapper
|
||||
self.grounding_engine = grounding_engine
|
||||
|
||||
async def run_parallel_processing(self, outline_sections, research, task_id: str = None) -> Tuple[Any, Any]:
|
||||
async def run_parallel_processing(self, outline_sections, research, user_id: str, task_id: str = None) -> Tuple[Any, Any]:
|
||||
"""
|
||||
Run source mapping and grounding insights extraction in parallel.
|
||||
|
||||
Args:
|
||||
outline_sections: List of outline sections to process
|
||||
research: Research data object
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
task_id: Optional task ID for progress updates
|
||||
|
||||
Returns:
|
||||
Tuple of (mapped_sections, grounding_insights)
|
||||
|
||||
Raises:
|
||||
ValueError: If user_id is not provided
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for parallel processing (subscription checks and usage tracking)")
|
||||
|
||||
if task_id:
|
||||
from api.blog_writer.task_manager import task_manager
|
||||
await task_manager.update_progress(task_id, "⚡ Running parallel processing for maximum speed...")
|
||||
@@ -37,7 +44,7 @@ class ParallelProcessor:
|
||||
|
||||
# Run these tasks in parallel to save time
|
||||
source_mapping_task = asyncio.create_task(
|
||||
self._run_source_mapping(outline_sections, research, task_id)
|
||||
self._run_source_mapping(outline_sections, research, task_id, user_id)
|
||||
)
|
||||
|
||||
grounding_insights_task = asyncio.create_task(
|
||||
@@ -52,22 +59,29 @@ class ParallelProcessor:
|
||||
|
||||
return mapped_sections, grounding_insights
|
||||
|
||||
async def run_parallel_processing_async(self, outline_sections, research) -> Tuple[Any, Any]:
|
||||
async def run_parallel_processing_async(self, outline_sections, research, user_id: str) -> Tuple[Any, Any]:
|
||||
"""
|
||||
Run parallel processing without progress updates (for non-progress methods).
|
||||
|
||||
Args:
|
||||
outline_sections: List of outline sections to process
|
||||
research: Research data object
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
|
||||
Returns:
|
||||
Tuple of (mapped_sections, grounding_insights)
|
||||
|
||||
Raises:
|
||||
ValueError: If user_id is not provided
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for parallel processing (subscription checks and usage tracking)")
|
||||
|
||||
logger.info("Running parallel processing for maximum speed...")
|
||||
|
||||
# Run these tasks in parallel to save time
|
||||
source_mapping_task = asyncio.create_task(
|
||||
self._run_source_mapping_async(outline_sections, research)
|
||||
self._run_source_mapping_async(outline_sections, research, user_id)
|
||||
)
|
||||
|
||||
grounding_insights_task = asyncio.create_task(
|
||||
@@ -82,12 +96,12 @@ class ParallelProcessor:
|
||||
|
||||
return mapped_sections, grounding_insights
|
||||
|
||||
async def _run_source_mapping(self, outline_sections, research, task_id):
|
||||
async def _run_source_mapping(self, outline_sections, research, task_id, user_id: str):
|
||||
"""Run source mapping in parallel."""
|
||||
if task_id:
|
||||
from api.blog_writer.task_manager import task_manager
|
||||
await task_manager.update_progress(task_id, "🔗 Applying intelligent source-to-section mapping...")
|
||||
return self.source_mapper.map_sources_to_sections(outline_sections, research)
|
||||
return self.source_mapper.map_sources_to_sections(outline_sections, research, user_id)
|
||||
|
||||
async def _run_grounding_insights_extraction(self, research, task_id):
|
||||
"""Run grounding insights extraction in parallel."""
|
||||
@@ -96,10 +110,10 @@ class ParallelProcessor:
|
||||
await task_manager.update_progress(task_id, "🧠 Extracting grounding metadata insights...")
|
||||
return self.grounding_engine.extract_contextual_insights(research.grounding_metadata)
|
||||
|
||||
async def _run_source_mapping_async(self, outline_sections, research):
|
||||
async def _run_source_mapping_async(self, outline_sections, research, user_id: str):
|
||||
"""Run source mapping in parallel (async version without progress updates)."""
|
||||
logger.info("Applying intelligent source-to-section mapping...")
|
||||
return self.source_mapper.map_sources_to_sections(outline_sections, research)
|
||||
return self.source_mapper.map_sources_to_sections(outline_sections, research, user_id)
|
||||
|
||||
async def _run_grounding_insights_extraction_async(self, research):
|
||||
"""Run grounding insights extraction in parallel (async version without progress updates)."""
|
||||
|
||||
@@ -18,8 +18,21 @@ class ResponseProcessor:
|
||||
"""Initialize the response processor."""
|
||||
pass
|
||||
|
||||
async def generate_with_retry(self, prompt: str, schema: Dict[str, Any], task_id: str = None) -> Dict[str, Any]:
|
||||
"""Generate outline with retry logic for API failures."""
|
||||
async def generate_with_retry(self, prompt: str, schema: Dict[str, Any], user_id: str, task_id: str = None) -> Dict[str, Any]:
|
||||
"""Generate outline with retry logic for API failures.
|
||||
|
||||
Args:
|
||||
prompt: The prompt for outline generation
|
||||
schema: JSON schema for structured response
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
task_id: Optional task ID for progress updates
|
||||
|
||||
Raises:
|
||||
ValueError: If user_id is not provided
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for outline generation (subscription checks and usage tracking)")
|
||||
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from api.blog_writer.task_manager import task_manager
|
||||
|
||||
@@ -34,7 +47,8 @@ class ResponseProcessor:
|
||||
outline_data = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=schema,
|
||||
system_prompt=None
|
||||
system_prompt=None,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Log response for debugging
|
||||
|
||||
@@ -12,8 +12,23 @@ from models.blog_models import BlogOutlineSection
|
||||
class SectionEnhancer:
|
||||
"""Enhances individual outline sections using AI."""
|
||||
|
||||
async def enhance(self, section: BlogOutlineSection, focus: str = "general improvement") -> BlogOutlineSection:
|
||||
"""Enhance a section using AI with research context."""
|
||||
async def enhance(self, section: BlogOutlineSection, focus: str, user_id: str) -> BlogOutlineSection:
|
||||
"""Enhance a section using AI with research context.
|
||||
|
||||
Args:
|
||||
section: Outline section to enhance
|
||||
focus: Enhancement focus (e.g., "general improvement")
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
|
||||
Returns:
|
||||
Enhanced outline section
|
||||
|
||||
Raises:
|
||||
ValueError: If user_id is not provided
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for section enhancement (subscription checks and usage tracking)")
|
||||
|
||||
enhancement_prompt = f"""
|
||||
Enhance the following blog section to make it more engaging, comprehensive, and valuable:
|
||||
|
||||
@@ -61,7 +76,8 @@ class SectionEnhancer:
|
||||
enhanced_data = llm_text_gen(
|
||||
prompt=enhancement_prompt,
|
||||
json_struct=enhancement_schema,
|
||||
system_prompt=None
|
||||
system_prompt=None,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
if isinstance(enhanced_data, dict) and 'error' not in enhanced_data:
|
||||
|
||||
@@ -52,7 +52,8 @@ class SourceToSectionMapper:
|
||||
def map_sources_to_sections(
|
||||
self,
|
||||
sections: List[BlogOutlineSection],
|
||||
research_data: BlogResearchResponse
|
||||
research_data: BlogResearchResponse,
|
||||
user_id: str
|
||||
) -> List[BlogOutlineSection]:
|
||||
"""
|
||||
Map research sources to outline sections using intelligent algorithms.
|
||||
@@ -60,10 +61,17 @@ class SourceToSectionMapper:
|
||||
Args:
|
||||
sections: List of outline sections to map sources to
|
||||
research_data: Research data containing sources and metadata
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
|
||||
Returns:
|
||||
List of outline sections with intelligently mapped sources
|
||||
|
||||
Raises:
|
||||
ValueError: If user_id is not provided
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for source mapping (subscription checks and usage tracking)")
|
||||
|
||||
if not sections or not research_data.sources:
|
||||
logger.warning("No sections or sources to map")
|
||||
return sections
|
||||
@@ -73,8 +81,8 @@ class SourceToSectionMapper:
|
||||
# Step 1: Algorithmic mapping
|
||||
mapping_results = self._algorithmic_source_mapping(sections, research_data)
|
||||
|
||||
# Step 2: AI validation and improvement (single prompt)
|
||||
validated_mapping = self._ai_validate_mapping(mapping_results, research_data)
|
||||
# Step 2: AI validation and improvement (single prompt, user_id required for subscription checks)
|
||||
validated_mapping = self._ai_validate_mapping(mapping_results, research_data, user_id)
|
||||
|
||||
# Step 3: Apply validated mapping to sections
|
||||
mapped_sections = self._apply_mapping_to_sections(sections, validated_mapping)
|
||||
@@ -261,7 +269,8 @@ class SourceToSectionMapper:
|
||||
def _ai_validate_mapping(
|
||||
self,
|
||||
mapping_results: Dict[str, List[Tuple[ResearchSource, float]]],
|
||||
research_data: BlogResearchResponse
|
||||
research_data: BlogResearchResponse,
|
||||
user_id: str
|
||||
) -> Dict[str, List[Tuple[ResearchSource, float]]]:
|
||||
"""
|
||||
Use AI to validate and improve the algorithmic mapping results.
|
||||
@@ -269,18 +278,25 @@ class SourceToSectionMapper:
|
||||
Args:
|
||||
mapping_results: Algorithmic mapping results
|
||||
research_data: Research data for context
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
|
||||
Returns:
|
||||
AI-validated and improved mapping results
|
||||
|
||||
Raises:
|
||||
ValueError: If user_id is not provided
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for AI validation (subscription checks and usage tracking)")
|
||||
|
||||
try:
|
||||
logger.info("Starting AI validation of source-to-section mapping...")
|
||||
|
||||
# Build AI validation prompt
|
||||
validation_prompt = self._build_validation_prompt(mapping_results, research_data)
|
||||
|
||||
# Get AI validation response
|
||||
validation_response = self._get_ai_validation_response(validation_prompt)
|
||||
# Get AI validation response (user_id required for subscription checks)
|
||||
validation_response = self._get_ai_validation_response(validation_prompt, user_id)
|
||||
|
||||
# Parse and apply AI validation results
|
||||
validated_mapping = self._parse_validation_response(validation_response, mapping_results, research_data)
|
||||
@@ -548,23 +564,31 @@ Analyze the mapping and provide your recommendations.
|
||||
|
||||
return prompt
|
||||
|
||||
def _get_ai_validation_response(self, prompt: str) -> str:
|
||||
def _get_ai_validation_response(self, prompt: str, user_id: str) -> str:
|
||||
"""
|
||||
Get AI validation response using LLM provider.
|
||||
|
||||
Args:
|
||||
prompt: Validation prompt
|
||||
user_id: User ID (required for subscription checks and usage tracking)
|
||||
|
||||
Returns:
|
||||
AI validation response
|
||||
|
||||
Raises:
|
||||
ValueError: If user_id is not provided
|
||||
"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for AI validation response (subscription checks and usage tracking)")
|
||||
|
||||
try:
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=None,
|
||||
system_prompt=None
|
||||
system_prompt=None,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
@@ -13,11 +13,17 @@ from .keyword_analyzer import KeywordAnalyzer
|
||||
from .competitor_analyzer import CompetitorAnalyzer
|
||||
from .content_angle_generator import ContentAngleGenerator
|
||||
from .data_filter import ResearchDataFilter
|
||||
from .base_provider import ResearchProvider as BaseResearchProvider
|
||||
from .google_provider import GoogleResearchProvider
|
||||
from .exa_provider import ExaResearchProvider
|
||||
|
||||
__all__ = [
|
||||
'ResearchService',
|
||||
'KeywordAnalyzer',
|
||||
'CompetitorAnalyzer',
|
||||
'ContentAngleGenerator',
|
||||
'ResearchDataFilter'
|
||||
'ResearchDataFilter',
|
||||
'BaseResearchProvider',
|
||||
'GoogleResearchProvider',
|
||||
'ExaResearchProvider',
|
||||
]
|
||||
|
||||
37
backend/services/blog_writer/research/base_provider.py
Normal file
37
backend/services/blog_writer/research/base_provider.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""
|
||||
Base Research Provider Interface
|
||||
|
||||
Abstract base class for research provider implementations.
|
||||
Ensures consistency across different research providers (Google, Exa, etc.)
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any
|
||||
|
||||
|
||||
class ResearchProvider(ABC):
|
||||
"""Abstract base class for research providers."""
|
||||
|
||||
@abstractmethod
|
||||
async def search(
|
||||
self,
|
||||
prompt: str,
|
||||
topic: str,
|
||||
industry: str,
|
||||
target_audience: str,
|
||||
config: Any, # ResearchConfig
|
||||
user_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Execute research and return raw results."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_provider_enum(self):
|
||||
"""Return APIProvider enum for subscription tracking."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def estimate_tokens(self) -> int:
|
||||
"""Estimate token usage for pre-flight validation."""
|
||||
pass
|
||||
|
||||
188
backend/services/blog_writer/research/exa_provider.py
Normal file
188
backend/services/blog_writer/research/exa_provider.py
Normal file
@@ -0,0 +1,188 @@
|
||||
"""
|
||||
Exa Research Provider
|
||||
|
||||
Neural search implementation using Exa API for high-quality, citation-rich research.
|
||||
"""
|
||||
|
||||
from exa_py import Exa
|
||||
import os
|
||||
from loguru import logger
|
||||
from models.subscription_models import APIProvider
|
||||
from .base_provider import ResearchProvider as BaseProvider
|
||||
|
||||
|
||||
class ExaResearchProvider(BaseProvider):
|
||||
"""Exa neural search provider."""
|
||||
|
||||
def __init__(self):
|
||||
self.api_key = os.getenv("EXA_API_KEY")
|
||||
if not self.api_key:
|
||||
raise RuntimeError("EXA_API_KEY not configured")
|
||||
self.exa = Exa(self.api_key)
|
||||
logger.info("✅ Exa Research Provider initialized")
|
||||
|
||||
async def search(self, prompt, topic, industry, target_audience, config, user_id):
|
||||
"""Execute Exa neural search and return standardized results."""
|
||||
# Build Exa query
|
||||
query = f"{topic} {industry} {target_audience}"
|
||||
|
||||
# Map source types to Exa categories
|
||||
category = self._map_source_type_to_category(config.source_types)
|
||||
|
||||
logger.info(f"[Exa Research] Executing search: {query}")
|
||||
|
||||
# Execute Exa search
|
||||
results = self.exa.search_and_contents(
|
||||
query,
|
||||
type="auto",
|
||||
category=category,
|
||||
num_results=min(config.max_sources, 25),
|
||||
contents={
|
||||
'text': {'max_characters': 1000},
|
||||
'summary': {'query': f"Key insights about {topic}"},
|
||||
'highlights': {
|
||||
'num_sentences': 2,
|
||||
'highlights_per_url': 3
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Transform to standardized format
|
||||
sources = self._transform_sources(results.results)
|
||||
content = self._aggregate_content(results.results)
|
||||
search_type = getattr(results, 'resolvedSearchType', 'neural') if hasattr(results, 'resolvedSearchType') else 'neural'
|
||||
|
||||
# Get cost if available
|
||||
cost = 0.005 # Default Exa cost for 1-25 results
|
||||
if hasattr(results, 'costDollars'):
|
||||
if hasattr(results.costDollars, 'total'):
|
||||
cost = results.costDollars.total
|
||||
|
||||
logger.info(f"[Exa Research] Search completed: {len(sources)} sources, type: {search_type}")
|
||||
|
||||
return {
|
||||
'sources': sources,
|
||||
'content': content,
|
||||
'search_type': search_type,
|
||||
'provider': 'exa',
|
||||
'search_queries': [query],
|
||||
'cost': {'total': cost}
|
||||
}
|
||||
|
||||
def get_provider_enum(self):
|
||||
"""Return EXA provider enum for subscription tracking."""
|
||||
return APIProvider.EXA
|
||||
|
||||
def estimate_tokens(self) -> int:
|
||||
"""Estimate token usage for Exa (not token-based)."""
|
||||
return 0 # Exa is per-search, not token-based
|
||||
|
||||
def _map_source_type_to_category(self, source_types):
|
||||
"""Map SourceType enum to Exa category parameter."""
|
||||
if not source_types:
|
||||
return None
|
||||
|
||||
category_map = {
|
||||
'research paper': 'research paper',
|
||||
'news': 'news',
|
||||
'web': 'personal site',
|
||||
'industry': 'company',
|
||||
'expert': 'linkedin profile'
|
||||
}
|
||||
|
||||
for st in source_types:
|
||||
if st.value in category_map:
|
||||
return category_map[st.value]
|
||||
|
||||
return None
|
||||
|
||||
def _transform_sources(self, results):
|
||||
"""Transform Exa results to ResearchSource format."""
|
||||
sources = []
|
||||
for idx, result in enumerate(results):
|
||||
source_type = self._determine_source_type(result.url if hasattr(result, 'url') else '')
|
||||
|
||||
sources.append({
|
||||
'title': result.title if hasattr(result, 'title') else '',
|
||||
'url': result.url if hasattr(result, 'url') else '',
|
||||
'excerpt': self._get_excerpt(result),
|
||||
'credibility_score': 0.85, # Exa results are high quality
|
||||
'published_at': result.publishedDate if hasattr(result, 'publishedDate') else None,
|
||||
'index': idx,
|
||||
'source_type': source_type,
|
||||
'content': result.text if hasattr(result, 'text') else '',
|
||||
'highlights': result.highlights if hasattr(result, 'highlights') else [],
|
||||
'summary': result.summary if hasattr(result, 'summary') else ''
|
||||
})
|
||||
|
||||
return sources
|
||||
|
||||
def _get_excerpt(self, result):
|
||||
"""Extract excerpt from Exa result."""
|
||||
if hasattr(result, 'text') and result.text:
|
||||
return result.text[:500]
|
||||
elif hasattr(result, 'summary') and result.summary:
|
||||
return result.summary
|
||||
return ''
|
||||
|
||||
def _determine_source_type(self, url):
|
||||
"""Determine source type from URL."""
|
||||
if not url:
|
||||
return 'web'
|
||||
|
||||
url_lower = url.lower()
|
||||
if 'arxiv.org' in url_lower or 'research' in url_lower:
|
||||
return 'academic'
|
||||
elif any(news in url_lower for news in ['cnn.com', 'bbc.com', 'reuters.com', 'theguardian.com']):
|
||||
return 'news'
|
||||
elif 'linkedin.com' in url_lower:
|
||||
return 'expert'
|
||||
else:
|
||||
return 'web'
|
||||
|
||||
def _aggregate_content(self, results):
|
||||
"""Aggregate content from Exa results for LLM analysis."""
|
||||
content_parts = []
|
||||
|
||||
for idx, result in enumerate(results):
|
||||
if hasattr(result, 'summary') and result.summary:
|
||||
content_parts.append(f"Source {idx + 1}: {result.summary}")
|
||||
elif hasattr(result, 'text') and result.text:
|
||||
content_parts.append(f"Source {idx + 1}: {result.text[:1000]}")
|
||||
|
||||
return "\n\n".join(content_parts)
|
||||
|
||||
def track_exa_usage(self, user_id: str, cost: float):
|
||||
"""Track Exa API usage after successful call."""
|
||||
from services.database import get_db
|
||||
from services.subscription import PricingService
|
||||
from sqlalchemy import text
|
||||
|
||||
db = next(get_db())
|
||||
try:
|
||||
pricing_service = PricingService(db)
|
||||
current_period = pricing_service.get_current_billing_period(user_id)
|
||||
|
||||
# Update exa_calls and exa_cost via SQL UPDATE
|
||||
update_query = text("""
|
||||
UPDATE usage_summaries
|
||||
SET exa_calls = COALESCE(exa_calls, 0) + 1,
|
||||
exa_cost = COALESCE(exa_cost, 0) + :cost,
|
||||
total_calls = total_calls + 1,
|
||||
total_cost = total_cost + :cost
|
||||
WHERE user_id = :user_id AND billing_period = :period
|
||||
""")
|
||||
db.execute(update_query, {
|
||||
'cost': cost,
|
||||
'user_id': user_id,
|
||||
'period': current_period
|
||||
})
|
||||
db.commit()
|
||||
|
||||
logger.info(f"[Exa] Tracked usage: user={user_id}, cost=${cost}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Exa] Failed to track usage: {e}")
|
||||
db.rollback()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
40
backend/services/blog_writer/research/google_provider.py
Normal file
40
backend/services/blog_writer/research/google_provider.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""
|
||||
Google Research Provider
|
||||
|
||||
Wrapper for Gemini native Google Search grounding to match base provider interface.
|
||||
"""
|
||||
|
||||
from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
|
||||
from models.subscription_models import APIProvider
|
||||
from .base_provider import ResearchProvider as BaseProvider
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class GoogleResearchProvider(BaseProvider):
|
||||
"""Google research provider using Gemini native grounding."""
|
||||
|
||||
def __init__(self):
|
||||
self.gemini = GeminiGroundedProvider()
|
||||
|
||||
async def search(self, prompt, topic, industry, target_audience, config, user_id):
|
||||
"""Call Gemini grounding with pre-flight validation."""
|
||||
logger.info(f"[Google Research] Executing search for topic: {topic}")
|
||||
|
||||
result = await self.gemini.generate_grounded_content(
|
||||
prompt=prompt,
|
||||
content_type="research",
|
||||
max_tokens=2000,
|
||||
user_id=user_id,
|
||||
validate_subsequent_operations=True
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def get_provider_enum(self):
|
||||
"""Return GEMINI provider enum for subscription tracking."""
|
||||
return APIProvider.GEMINI
|
||||
|
||||
def estimate_tokens(self) -> int:
|
||||
"""Estimate token usage for Google grounding."""
|
||||
return 1200 # Conservative estimate
|
||||
|
||||
@@ -16,6 +16,9 @@ from models.blog_models import (
|
||||
GroundingChunk,
|
||||
GroundingSupport,
|
||||
Citation,
|
||||
ResearchConfig,
|
||||
ResearchMode,
|
||||
ResearchProvider,
|
||||
)
|
||||
from services.blog_writer.logger_config import blog_writer_logger, log_function_call
|
||||
from fastapi import HTTPException
|
||||
@@ -24,6 +27,7 @@ from .keyword_analyzer import KeywordAnalyzer
|
||||
from .competitor_analyzer import CompetitorAnalyzer
|
||||
from .content_angle_generator import ContentAngleGenerator
|
||||
from .data_filter import ResearchDataFilter
|
||||
from .research_strategies import get_strategy_for_mode
|
||||
|
||||
|
||||
class ResearchService:
|
||||
@@ -44,7 +48,6 @@ class ResearchService:
|
||||
Includes intelligent caching for exact keyword matches.
|
||||
"""
|
||||
try:
|
||||
from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
|
||||
from services.cache.research_cache import research_cache
|
||||
|
||||
topic = request.topic or ", ".join(request.keywords)
|
||||
@@ -79,62 +82,104 @@ class ResearchService:
|
||||
|
||||
# Cache miss - proceed with API call
|
||||
logger.info(f"Cache miss - making API call for keywords: {request.keywords}")
|
||||
blog_writer_logger.log_operation_start("gemini_api_call", api_name="gemini_grounded", operation="research")
|
||||
gemini = GeminiGroundedProvider()
|
||||
blog_writer_logger.log_operation_start("research_api_call", api_name="research", operation="research")
|
||||
|
||||
# Single comprehensive research prompt - Gemini handles Google Search automatically
|
||||
research_prompt = f"""
|
||||
Research the topic "{topic}" in the {industry} industry for {target_audience} audience. Provide a comprehensive analysis including:
|
||||
|
||||
1. Current trends and insights (2024-2025)
|
||||
2. Key statistics and data points with sources
|
||||
3. Industry expert opinions and quotes
|
||||
4. Recent developments and news
|
||||
5. Market analysis and forecasts
|
||||
6. Best practices and case studies
|
||||
7. Keyword analysis: primary, secondary, and long-tail opportunities
|
||||
8. Competitor analysis: top players and content gaps
|
||||
9. Content angle suggestions: 5 compelling angles for blog posts
|
||||
|
||||
Focus on factual, up-to-date information from credible sources.
|
||||
Include specific data points, percentages, and recent developments.
|
||||
Structure your response with clear sections for each analysis area.
|
||||
"""
|
||||
# Determine research mode and get appropriate strategy
|
||||
research_mode = request.research_mode or ResearchMode.BASIC
|
||||
config = request.config or ResearchConfig(mode=research_mode, provider=ResearchProvider.GOOGLE)
|
||||
strategy = get_strategy_for_mode(research_mode)
|
||||
|
||||
# Single Gemini call with native Google Search grounding - no fallbacks
|
||||
# Validation is handled inside generate_grounded_content when validate_subsequent_operations=True
|
||||
import time
|
||||
api_start_time = time.time()
|
||||
gemini_result = await gemini.generate_grounded_content(
|
||||
prompt=research_prompt,
|
||||
content_type="research",
|
||||
max_tokens=2000,
|
||||
user_id=user_id,
|
||||
validate_subsequent_operations=True # Validates Google Grounding + 3 LLM calls
|
||||
)
|
||||
api_duration_ms = (time.time() - api_start_time) * 1000
|
||||
logger.info(f"Research: mode={research_mode.value}, provider={config.provider.value}")
|
||||
|
||||
# Log API call performance
|
||||
blog_writer_logger.log_api_call(
|
||||
"gemini_grounded",
|
||||
"generate_grounded_content",
|
||||
api_duration_ms,
|
||||
token_usage=gemini_result.get("token_usage", {}),
|
||||
content_length=len(gemini_result.get("content", ""))
|
||||
)
|
||||
# Build research prompt based on strategy
|
||||
research_prompt = strategy.build_research_prompt(topic, industry, target_audience, config)
|
||||
|
||||
# Extract sources from grounding metadata
|
||||
sources = self._extract_sources_from_grounding(gemini_result)
|
||||
# Route to appropriate provider
|
||||
if config.provider == ResearchProvider.EXA:
|
||||
# Exa research workflow
|
||||
from .exa_provider import ExaResearchProvider
|
||||
from services.subscription.preflight_validator import validate_exa_research_operations
|
||||
from services.database import get_db
|
||||
from services.subscription import PricingService
|
||||
import os
|
||||
import time
|
||||
|
||||
# Pre-flight validation
|
||||
db_val = next(get_db())
|
||||
try:
|
||||
pricing_service = PricingService(db_val)
|
||||
gpt_provider = os.getenv("GPT_PROVIDER", "google")
|
||||
validate_exa_research_operations(pricing_service, user_id, gpt_provider)
|
||||
finally:
|
||||
db_val.close()
|
||||
|
||||
# Execute Exa search
|
||||
api_start_time = time.time()
|
||||
try:
|
||||
exa_provider = ExaResearchProvider()
|
||||
raw_result = await exa_provider.search(
|
||||
research_prompt, topic, industry, target_audience, config, user_id
|
||||
)
|
||||
api_duration_ms = (time.time() - api_start_time) * 1000
|
||||
|
||||
# Track usage
|
||||
cost = raw_result.get('cost', {}).get('total', 0.005) if isinstance(raw_result.get('cost'), dict) else 0.005
|
||||
exa_provider.track_exa_usage(user_id, cost)
|
||||
|
||||
# Log API call performance
|
||||
blog_writer_logger.log_api_call(
|
||||
"exa_search",
|
||||
"search_and_contents",
|
||||
api_duration_ms,
|
||||
token_usage={},
|
||||
content_length=len(raw_result.get('content', ''))
|
||||
)
|
||||
|
||||
# Extract content for downstream analysis
|
||||
content = raw_result.get('content', '')
|
||||
sources = raw_result.get('sources', [])
|
||||
search_widget = "" # Exa doesn't provide search widgets
|
||||
search_queries = raw_result.get('search_queries', [])
|
||||
grounding_metadata = None # Exa doesn't provide grounding metadata
|
||||
|
||||
except RuntimeError as e:
|
||||
if "EXA_API_KEY not configured" in str(e):
|
||||
logger.warning("Exa not configured, falling back to Google")
|
||||
config.provider = ResearchProvider.GOOGLE
|
||||
# Continue to Google flow below
|
||||
raw_result = None
|
||||
else:
|
||||
raise
|
||||
|
||||
if config.provider != ResearchProvider.EXA:
|
||||
# Google research (existing flow) or fallback from Exa
|
||||
from .google_provider import GoogleResearchProvider
|
||||
import time
|
||||
|
||||
api_start_time = time.time()
|
||||
google_provider = GoogleResearchProvider()
|
||||
gemini_result = await google_provider.search(
|
||||
research_prompt, topic, industry, target_audience, config, user_id
|
||||
)
|
||||
api_duration_ms = (time.time() - api_start_time) * 1000
|
||||
|
||||
# Log API call performance
|
||||
blog_writer_logger.log_api_call(
|
||||
"gemini_grounded",
|
||||
"generate_grounded_content",
|
||||
api_duration_ms,
|
||||
token_usage=gemini_result.get("token_usage", {}),
|
||||
content_length=len(gemini_result.get("content", ""))
|
||||
)
|
||||
|
||||
# Extract sources and content
|
||||
sources = self._extract_sources_from_grounding(gemini_result)
|
||||
content = gemini_result.get("content", "")
|
||||
search_widget = gemini_result.get("search_widget", "") or ""
|
||||
search_queries = gemini_result.get("search_queries", []) or []
|
||||
grounding_metadata = self._extract_grounding_metadata(gemini_result)
|
||||
|
||||
# Extract grounding metadata for detailed UI display
|
||||
grounding_metadata = self._extract_grounding_metadata(gemini_result)
|
||||
|
||||
# Extract search widget and queries for UI display
|
||||
search_widget = gemini_result.get("search_widget", "") or ""
|
||||
search_queries = gemini_result.get("search_queries", []) or []
|
||||
|
||||
# Parse the comprehensive response for different analysis components
|
||||
content = gemini_result.get("content", "")
|
||||
# Continue with common analysis (same for both providers)
|
||||
keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords, user_id=user_id)
|
||||
competitor_analysis = self.competitor_analyzer.analyze(content, user_id=user_id)
|
||||
suggested_angles = self.content_angle_generator.generate(content, topic, industry, user_id=user_id)
|
||||
@@ -261,7 +306,6 @@ class ResearchService:
|
||||
Research method with progress updates for real-time feedback.
|
||||
"""
|
||||
try:
|
||||
from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
|
||||
from services.cache.research_cache import research_cache
|
||||
from services.cache.persistent_research_cache import persistent_research_cache
|
||||
from api.blog_writer.task_manager import task_manager
|
||||
@@ -293,66 +337,100 @@ class ResearchService:
|
||||
logger.info(f"Returning cached research result for keywords: {request.keywords}")
|
||||
return BlogResearchResponse(**cached_result)
|
||||
|
||||
# User ID validation (validation logic is now in Google Grounding provider)
|
||||
# User ID validation
|
||||
if not user_id:
|
||||
await task_manager.update_progress(task_id, "❌ Error: User ID is required for research operation")
|
||||
raise ValueError("user_id is required for research operation. Please provide Clerk user ID.")
|
||||
|
||||
# Cache miss - proceed with API call
|
||||
await task_manager.update_progress(task_id, "🌐 Cache miss - connecting to Google Search grounding...")
|
||||
logger.info(f"Cache miss - making API call for keywords: {request.keywords}")
|
||||
gemini = GeminiGroundedProvider()
|
||||
|
||||
# Single comprehensive research prompt - Gemini handles Google Search automatically
|
||||
research_prompt = f"""
|
||||
Research the topic "{topic}" in the {industry} industry for {target_audience} audience. Provide a comprehensive analysis including:
|
||||
|
||||
1. Current trends and insights (2024-2025)
|
||||
2. Key statistics and data points with sources
|
||||
3. Industry expert opinions and quotes
|
||||
4. Recent developments and news
|
||||
5. Market analysis and forecasts
|
||||
6. Best practices and case studies
|
||||
7. Keyword analysis: primary, secondary, and long-tail opportunities
|
||||
8. Competitor analysis: top players and content gaps
|
||||
9. Content angle suggestions: 5 compelling angles for blog posts
|
||||
|
||||
Focus on factual, up-to-date information from credible sources.
|
||||
Include specific data points, percentages, and recent developments.
|
||||
Structure your response with clear sections for each analysis area.
|
||||
"""
|
||||
# Determine research mode and get appropriate strategy
|
||||
research_mode = request.research_mode or ResearchMode.BASIC
|
||||
config = request.config or ResearchConfig(mode=research_mode, provider=ResearchProvider.GOOGLE)
|
||||
strategy = get_strategy_for_mode(research_mode)
|
||||
|
||||
await task_manager.update_progress(task_id, "🤖 Making AI request to Gemini with Google Search grounding...")
|
||||
# Single Gemini call with native Google Search grounding - no fallbacks
|
||||
# Validation is handled inside generate_grounded_content when validate_subsequent_operations=True
|
||||
try:
|
||||
gemini_result = await gemini.generate_grounded_content(
|
||||
prompt=research_prompt,
|
||||
content_type="research",
|
||||
max_tokens=2000,
|
||||
user_id=user_id,
|
||||
validate_subsequent_operations=True # Validates Google Grounding + 3 LLM calls
|
||||
)
|
||||
except HTTPException as http_error:
|
||||
# Re-raise HTTPException so it can be properly handled by task manager
|
||||
logger.error(f"Subscription limit exceeded for research: {http_error.detail}")
|
||||
await task_manager.update_progress(task_id, f"❌ Subscription limit exceeded: {http_error.detail.get('message', str(http_error.detail)) if isinstance(http_error.detail, dict) else str(http_error.detail)}")
|
||||
raise # Re-raise HTTPException to preserve status code and error details
|
||||
logger.info(f"Research: mode={research_mode.value}, provider={config.provider.value}")
|
||||
|
||||
await task_manager.update_progress(task_id, "📊 Processing research results and extracting insights...")
|
||||
# Extract sources from grounding metadata
|
||||
sources = self._extract_sources_from_grounding(gemini_result)
|
||||
# Build research prompt based on strategy
|
||||
research_prompt = strategy.build_research_prompt(topic, industry, target_audience, config)
|
||||
|
||||
# Extract grounding metadata for detailed UI display
|
||||
grounding_metadata = self._extract_grounding_metadata(gemini_result)
|
||||
|
||||
# Extract search widget and queries for UI display
|
||||
search_widget = gemini_result.get("search_widget", "") or ""
|
||||
search_queries = gemini_result.get("search_queries", []) or []
|
||||
# Route to appropriate provider
|
||||
if config.provider == ResearchProvider.EXA:
|
||||
# Exa research workflow
|
||||
from .exa_provider import ExaResearchProvider
|
||||
from services.subscription.preflight_validator import validate_exa_research_operations
|
||||
from services.database import get_db
|
||||
from services.subscription import PricingService
|
||||
import os
|
||||
|
||||
await task_manager.update_progress(task_id, "🌐 Connecting to Exa neural search...")
|
||||
|
||||
# Pre-flight validation
|
||||
db_val = next(get_db())
|
||||
try:
|
||||
pricing_service = PricingService(db_val)
|
||||
gpt_provider = os.getenv("GPT_PROVIDER", "google")
|
||||
validate_exa_research_operations(pricing_service, user_id, gpt_provider)
|
||||
except HTTPException as http_error:
|
||||
logger.error(f"Subscription limit exceeded for Exa research: {http_error.detail}")
|
||||
await task_manager.update_progress(task_id, f"❌ Subscription limit exceeded: {http_error.detail.get('message', str(http_error.detail)) if isinstance(http_error.detail, dict) else str(http_error.detail)}")
|
||||
raise
|
||||
finally:
|
||||
db_val.close()
|
||||
|
||||
# Execute Exa search
|
||||
await task_manager.update_progress(task_id, "🤖 Executing Exa neural search...")
|
||||
try:
|
||||
exa_provider = ExaResearchProvider()
|
||||
raw_result = await exa_provider.search(
|
||||
research_prompt, topic, industry, target_audience, config, user_id
|
||||
)
|
||||
|
||||
# Track usage
|
||||
cost = raw_result.get('cost', {}).get('total', 0.005) if isinstance(raw_result.get('cost'), dict) else 0.005
|
||||
exa_provider.track_exa_usage(user_id, cost)
|
||||
|
||||
# Extract content for downstream analysis
|
||||
content = raw_result.get('content', '')
|
||||
sources = raw_result.get('sources', [])
|
||||
search_widget = "" # Exa doesn't provide search widgets
|
||||
search_queries = raw_result.get('search_queries', [])
|
||||
grounding_metadata = None # Exa doesn't provide grounding metadata
|
||||
|
||||
except RuntimeError as e:
|
||||
if "EXA_API_KEY not configured" in str(e):
|
||||
logger.warning("Exa not configured, falling back to Google")
|
||||
await task_manager.update_progress(task_id, "⚠️ Exa not configured, falling back to Google Search")
|
||||
config.provider = ResearchProvider.GOOGLE
|
||||
# Continue to Google flow below
|
||||
else:
|
||||
raise
|
||||
|
||||
if config.provider != ResearchProvider.EXA:
|
||||
# Google research (existing flow)
|
||||
from .google_provider import GoogleResearchProvider
|
||||
|
||||
await task_manager.update_progress(task_id, "🌐 Connecting to Google Search grounding...")
|
||||
google_provider = GoogleResearchProvider()
|
||||
|
||||
await task_manager.update_progress(task_id, "🤖 Making AI request to Gemini with Google Search grounding...")
|
||||
try:
|
||||
gemini_result = await google_provider.search(
|
||||
research_prompt, topic, industry, target_audience, config, user_id
|
||||
)
|
||||
except HTTPException as http_error:
|
||||
logger.error(f"Subscription limit exceeded for Google research: {http_error.detail}")
|
||||
await task_manager.update_progress(task_id, f"❌ Subscription limit exceeded: {http_error.detail.get('message', str(http_error.detail)) if isinstance(http_error.detail, dict) else str(http_error.detail)}")
|
||||
raise
|
||||
|
||||
await task_manager.update_progress(task_id, "📊 Processing research results and extracting insights...")
|
||||
# Extract sources and content
|
||||
sources = self._extract_sources_from_grounding(gemini_result)
|
||||
content = gemini_result.get("content", "")
|
||||
search_widget = gemini_result.get("search_widget", "") or ""
|
||||
search_queries = gemini_result.get("search_queries", []) or []
|
||||
grounding_metadata = self._extract_grounding_metadata(gemini_result)
|
||||
|
||||
# Continue with common analysis (same for both providers)
|
||||
await task_manager.update_progress(task_id, "🔍 Analyzing keywords and content angles...")
|
||||
# Parse the comprehensive response for different analysis components
|
||||
content = gemini_result.get("content", "")
|
||||
keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords, user_id=user_id)
|
||||
competitor_analysis = self.competitor_analyzer.analyze(content, user_id=user_id)
|
||||
suggested_angles = self.content_angle_generator.generate(content, topic, industry, user_id=user_id)
|
||||
|
||||
234
backend/services/blog_writer/research/research_strategies.py
Normal file
234
backend/services/blog_writer/research/research_strategies.py
Normal file
@@ -0,0 +1,234 @@
|
||||
"""
|
||||
Research Strategy Pattern Implementation
|
||||
|
||||
Different strategies for executing research based on depth and focus.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any
|
||||
from loguru import logger
|
||||
|
||||
from models.blog_models import BlogResearchRequest, ResearchMode, ResearchConfig
|
||||
from .keyword_analyzer import KeywordAnalyzer
|
||||
from .competitor_analyzer import CompetitorAnalyzer
|
||||
from .content_angle_generator import ContentAngleGenerator
|
||||
|
||||
|
||||
class ResearchStrategy(ABC):
|
||||
"""Base class for research strategies."""
|
||||
|
||||
def __init__(self):
|
||||
self.keyword_analyzer = KeywordAnalyzer()
|
||||
self.competitor_analyzer = CompetitorAnalyzer()
|
||||
self.content_angle_generator = ContentAngleGenerator()
|
||||
|
||||
@abstractmethod
|
||||
def build_research_prompt(
|
||||
self,
|
||||
topic: str,
|
||||
industry: str,
|
||||
target_audience: str,
|
||||
config: ResearchConfig
|
||||
) -> str:
|
||||
"""Build the research prompt for the strategy."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_mode(self) -> ResearchMode:
|
||||
"""Return the research mode this strategy handles."""
|
||||
pass
|
||||
|
||||
|
||||
class BasicResearchStrategy(ResearchStrategy):
|
||||
"""Basic research strategy - keyword focused, minimal analysis."""
|
||||
|
||||
def get_mode(self) -> ResearchMode:
|
||||
return ResearchMode.BASIC
|
||||
|
||||
def build_research_prompt(
|
||||
self,
|
||||
topic: str,
|
||||
industry: str,
|
||||
target_audience: str,
|
||||
config: ResearchConfig
|
||||
) -> str:
|
||||
"""Build basic research prompt focused on keywords and quick insights."""
|
||||
prompt = f"""You are a professional blog content strategist researching for a {industry} blog targeting {target_audience}.
|
||||
|
||||
Research Topic: "{topic}"
|
||||
|
||||
Provide analysis in this EXACT format:
|
||||
|
||||
## CURRENT TRENDS (2024-2025)
|
||||
- [Trend 1 with specific data and source URL]
|
||||
- [Trend 2 with specific data and source URL]
|
||||
- [Trend 3 with specific data and source URL]
|
||||
|
||||
## KEY STATISTICS
|
||||
- [Statistic 1: specific number/percentage with source URL]
|
||||
- [Statistic 2: specific number/percentage with source URL]
|
||||
- [Statistic 3: specific number/percentage with source URL]
|
||||
- [Statistic 4: specific number/percentage with source URL]
|
||||
- [Statistic 5: specific number/percentage with source URL]
|
||||
|
||||
## PRIMARY KEYWORDS
|
||||
1. "{topic}" (main keyword)
|
||||
2. [Variation 1]
|
||||
3. [Variation 2]
|
||||
|
||||
## SECONDARY KEYWORDS
|
||||
[5 related keywords for blog content]
|
||||
|
||||
## CONTENT ANGLES (Top 5)
|
||||
1. [Angle 1: specific unique approach]
|
||||
2. [Angle 2: specific unique approach]
|
||||
3. [Angle 3: specific unique approach]
|
||||
4. [Angle 4: specific unique approach]
|
||||
5. [Angle 5: specific unique approach]
|
||||
|
||||
REQUIREMENTS:
|
||||
- Cite EVERY claim with authoritative source URLs
|
||||
- Use 2024-2025 data when available
|
||||
- Include specific numbers, dates, examples
|
||||
- Focus on actionable blog insights for {target_audience}"""
|
||||
return prompt.strip()
|
||||
|
||||
|
||||
class ComprehensiveResearchStrategy(ResearchStrategy):
|
||||
"""Comprehensive research strategy - full analysis with all components."""
|
||||
|
||||
def get_mode(self) -> ResearchMode:
|
||||
return ResearchMode.COMPREHENSIVE
|
||||
|
||||
def build_research_prompt(
|
||||
self,
|
||||
topic: str,
|
||||
industry: str,
|
||||
target_audience: str,
|
||||
config: ResearchConfig
|
||||
) -> str:
|
||||
"""Build comprehensive research prompt with all analysis components."""
|
||||
date_filter = f"\nDate Focus: {config.date_range.value.replace('_', ' ')}" if config.date_range else ""
|
||||
source_filter = f"\nPriority Sources: {', '.join([s.value for s in config.source_types])}" if config.source_types else ""
|
||||
|
||||
prompt = f"""You are a senior blog content strategist conducting comprehensive research for a {industry} blog targeting {target_audience}.
|
||||
|
||||
Research Topic: "{topic}"{date_filter}{source_filter}
|
||||
|
||||
Provide COMPLETE analysis in this EXACT format:
|
||||
|
||||
## TRENDS AND INSIGHTS (2024-2025)
|
||||
[5-7 trends with specific data, numbers, and source URLs]
|
||||
|
||||
## KEY STATISTICS
|
||||
[7-10 statistics with exact numbers, percentages, dates, and source URLs]
|
||||
|
||||
## EXPERT OPINIONS
|
||||
[4-5 expert quotes with full attribution and source URLs]
|
||||
|
||||
## RECENT DEVELOPMENTS
|
||||
[5-7 recent news/developments with dates and source URLs]
|
||||
|
||||
## MARKET ANALYSIS
|
||||
[3-5 market insights with data points and source URLs]
|
||||
|
||||
## BEST PRACTICES & CASE STUDIES
|
||||
[3-5 examples with specific outcomes/metrics and source URLs]
|
||||
|
||||
## KEYWORD ANALYSIS
|
||||
Primary Keywords: [3 main variations]
|
||||
Secondary Keywords: [7-10 related keywords]
|
||||
Long-Tail Opportunities: [5-7 specific search phrases]
|
||||
|
||||
## COMPETITOR ANALYSIS
|
||||
Top Competitors: [5 competitors with brief descriptions]
|
||||
Content Gaps: [5 topics competitors are missing]
|
||||
Competitive Advantages: [5 unique angles we can own]
|
||||
|
||||
## CONTENT ANGLES (Exactly 5)
|
||||
1. [Unique angle with reasoning and target benefit]
|
||||
2. [Unique angle with reasoning and target benefit]
|
||||
3. [Unique angle with reasoning and target benefit]
|
||||
4. [Unique angle with reasoning and target benefit]
|
||||
5. [Unique angle with reasoning and target benefit]
|
||||
|
||||
VERIFICATION REQUIREMENTS:
|
||||
- Minimum 2 authoritative sources per major claim
|
||||
- Prioritize: Industry publications > Research papers > News > Blogs
|
||||
- 2024-2025 data strongly preferred
|
||||
- All numbers must include context (timeframe, sample size, methodology)
|
||||
- Every recommendation must be actionable for {target_audience}"""
|
||||
return prompt.strip()
|
||||
|
||||
|
||||
class TargetedResearchStrategy(ResearchStrategy):
|
||||
"""Targeted research strategy - focused on specific aspects."""
|
||||
|
||||
def get_mode(self) -> ResearchMode:
|
||||
return ResearchMode.TARGETED
|
||||
|
||||
def build_research_prompt(
|
||||
self,
|
||||
topic: str,
|
||||
industry: str,
|
||||
target_audience: str,
|
||||
config: ResearchConfig
|
||||
) -> str:
|
||||
"""Build targeted research prompt based on config preferences."""
|
||||
sections = []
|
||||
|
||||
if config.include_trends:
|
||||
sections.append("""## CURRENT TRENDS
|
||||
[3-5 trends with data and source URLs]""")
|
||||
|
||||
if config.include_statistics:
|
||||
sections.append("""## KEY STATISTICS
|
||||
[5-7 statistics with numbers and source URLs]""")
|
||||
|
||||
if config.include_expert_quotes:
|
||||
sections.append("""## EXPERT OPINIONS
|
||||
[3-4 expert quotes with attribution and source URLs]""")
|
||||
|
||||
if config.include_competitors:
|
||||
sections.append("""## COMPETITOR ANALYSIS
|
||||
Top Competitors: [3-5]
|
||||
Content Gaps: [3-5]""")
|
||||
|
||||
# Always include keywords and angles
|
||||
sections.append("""## KEYWORD ANALYSIS
|
||||
Primary: [2-3 variations]
|
||||
Secondary: [5-7 keywords]
|
||||
Long-Tail: [3-5 phrases]""")
|
||||
|
||||
sections.append("""## CONTENT ANGLES (3-5)
|
||||
[Unique blog angles with reasoning]""")
|
||||
|
||||
sections_str = "\n\n".join(sections)
|
||||
|
||||
prompt = f"""You are a blog content strategist conducting targeted research for a {industry} blog targeting {target_audience}.
|
||||
|
||||
Research Topic: "{topic}"
|
||||
|
||||
Provide focused analysis in this EXACT format:
|
||||
|
||||
{sections_str}
|
||||
|
||||
REQUIREMENTS:
|
||||
- Cite all claims with authoritative source URLs
|
||||
- Include specific numbers, dates, examples
|
||||
- Focus on actionable insights for {target_audience}
|
||||
- Use 2024-2025 data when available"""
|
||||
return prompt.strip()
|
||||
|
||||
|
||||
def get_strategy_for_mode(mode: ResearchMode) -> ResearchStrategy:
|
||||
"""Factory function to get the appropriate strategy for a mode."""
|
||||
strategy_map = {
|
||||
ResearchMode.BASIC: BasicResearchStrategy,
|
||||
ResearchMode.COMPREHENSIVE: ComprehensiveResearchStrategy,
|
||||
ResearchMode.TARGETED: TargetedResearchStrategy,
|
||||
}
|
||||
|
||||
strategy_class = strategy_map.get(mode, BasicResearchStrategy)
|
||||
return strategy_class()
|
||||
|
||||
Reference in New Issue
Block a user