AI Blog Writer - Implement modular architecture with research, outline, and core services

This commit is contained in:
ajaysi
2025-09-12 16:53:16 +05:30
parent c0a366269d
commit 2ae0c4a8b9
29 changed files with 3210 additions and 907 deletions

View File

@@ -1,5 +1,9 @@
from fastapi import APIRouter, HTTPException
from typing import Any, Dict
import asyncio
import uuid
from datetime import datetime
from loguru import logger
from models.blog_models import (
BlogResearchRequest,
@@ -27,22 +31,234 @@ router = APIRouter(prefix="/api/blog", tags=["AI Blog Writer"])
service = BlogWriterService()
# Simple in-memory task storage (in production, use Redis or database)
task_storage: Dict[str, Dict[str, Any]] = {}
def cleanup_old_tasks():
"""Remove tasks older than 1 hour to prevent memory leaks."""
current_time = datetime.now()
tasks_to_remove = []
for task_id, task_data in task_storage.items():
if (current_time - task_data["created_at"]).total_seconds() > 3600: # 1 hour
tasks_to_remove.append(task_id)
for task_id in tasks_to_remove:
del task_storage[task_id]
@router.get("/health")
async def health() -> Dict[str, Any]:
return {"status": "ok", "service": "ai_blog_writer"}
@router.get("/cache/stats")
async def get_cache_stats() -> Dict[str, Any]:
"""Get research cache statistics."""
try:
from services.cache.research_cache import research_cache
return research_cache.get_cache_stats()
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/cache/clear")
async def clear_cache() -> Dict[str, Any]:
"""Clear the research cache."""
try:
from services.cache.research_cache import research_cache
research_cache.clear_cache()
return {"status": "success", "message": "Research cache cleared"}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/research/start")
async def start_research(request: BlogResearchRequest) -> Dict[str, Any]:
"""Start a research operation and return a task ID for polling."""
try:
task_id = str(uuid.uuid4())
# Initialize task status
task_storage[task_id] = {
"status": "pending",
"created_at": datetime.now(),
"result": None,
"error": None
}
# Start the research operation in the background
asyncio.create_task(run_research_task(task_id, request))
return {"task_id": task_id, "status": "started"}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/research/status/{task_id}")
async def get_research_status(task_id: str) -> Dict[str, Any]:
"""Get the status of a research operation."""
# Cleanup old tasks periodically
cleanup_old_tasks()
if task_id not in task_storage:
raise HTTPException(status_code=404, detail="Task not found")
task = task_storage[task_id]
response = {
"task_id": task_id,
"status": task["status"],
"created_at": task["created_at"].isoformat(),
"progress_messages": task.get("progress_messages", [])
}
if task["status"] == "completed":
response["result"] = task["result"]
elif task["status"] == "failed":
response["error"] = task["error"]
return response
async def run_research_task(task_id: str, request: BlogResearchRequest):
"""Background task to run research and update status with progress messages."""
try:
# Update status to running
task_storage[task_id]["status"] = "running"
task_storage[task_id]["progress_messages"] = []
# Send initial progress message
await _update_progress(task_id, "🔍 Starting research operation...")
# Check cache first
await _update_progress(task_id, "📋 Checking cache for existing research...")
# Run the actual research with progress updates
result = await service.research_with_progress(request, task_id)
# Check if research failed gracefully
if not result.success:
await _update_progress(task_id, f"❌ Research failed: {result.error_message or 'Unknown error'}")
task_storage[task_id]["status"] = "failed"
task_storage[task_id]["error"] = result.error_message or "Research failed"
else:
await _update_progress(task_id, f"✅ Research completed successfully! Found {len(result.sources)} sources and {len(result.search_queries or [])} search queries.")
# Update status to completed
task_storage[task_id]["status"] = "completed"
task_storage[task_id]["result"] = result.dict()
except Exception as e:
await _update_progress(task_id, f"❌ Research failed with error: {str(e)}")
# Update status to failed
task_storage[task_id]["status"] = "failed"
task_storage[task_id]["error"] = str(e)
async def _update_progress(task_id: str, message: str):
"""Update progress message for a task."""
if task_id in task_storage:
if "progress_messages" not in task_storage[task_id]:
task_storage[task_id]["progress_messages"] = []
progress_entry = {
"timestamp": datetime.now().isoformat(),
"message": message
}
task_storage[task_id]["progress_messages"].append(progress_entry)
# Keep only last 10 progress messages to prevent memory bloat
if len(task_storage[task_id]["progress_messages"]) > 10:
task_storage[task_id]["progress_messages"] = task_storage[task_id]["progress_messages"][-10:]
logger.info(f"Progress update for task {task_id}: {message}")
@router.post("/research", response_model=BlogResearchResponse)
async def research(request: BlogResearchRequest) -> BlogResearchResponse:
"""Legacy endpoint - kept for backward compatibility."""
try:
return await service.research(request)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/outline/start")
async def start_outline_generation(request: BlogOutlineRequest) -> Dict[str, Any]:
"""Start an outline generation operation and return a task ID for polling."""
try:
task_id = str(uuid.uuid4())
# Initialize task status
task_storage[task_id] = {
"status": "pending",
"created_at": datetime.now(),
"result": None,
"error": None,
"progress_messages": []
}
# Start the outline generation operation in the background
asyncio.create_task(run_outline_generation_task(task_id, request))
return {"task_id": task_id, "status": "started"}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/outline/status/{task_id}")
async def get_outline_status(task_id: str) -> Dict[str, Any]:
"""Get the status of an outline generation operation."""
# Cleanup old tasks periodically
cleanup_old_tasks()
if task_id not in task_storage:
raise HTTPException(status_code=404, detail="Task not found")
task = task_storage[task_id]
response = {
"task_id": task_id,
"status": task["status"],
"created_at": task["created_at"].isoformat(),
"progress_messages": task.get("progress_messages", [])
}
if task["status"] == "completed":
response["result"] = task["result"]
elif task["status"] == "failed":
response["error"] = task["error"]
return response
async def run_outline_generation_task(task_id: str, request: BlogOutlineRequest):
"""Background task to run outline generation and update status with progress messages."""
try:
# Update status to running
task_storage[task_id]["status"] = "running"
task_storage[task_id]["progress_messages"] = []
# Send initial progress message
await _update_progress(task_id, "🧩 Starting outline generation...")
# Run the actual outline generation with progress updates
result = await service.generate_outline_with_progress(request, task_id)
# Update status to completed
await _update_progress(task_id, f"✅ Outline generated successfully! Created {len(result.outline)} sections with {len(result.title_options)} title options.")
task_storage[task_id]["status"] = "completed"
task_storage[task_id]["result"] = result.dict()
except Exception as e:
await _update_progress(task_id, f"❌ Outline generation failed: {str(e)}")
# Update status to failed
task_storage[task_id]["status"] = "failed"
task_storage[task_id]["error"] = str(e)
@router.post("/outline/generate", response_model=BlogOutlineResponse)
async def generate_outline(request: BlogOutlineRequest) -> BlogOutlineResponse:
"""Legacy endpoint - kept for backward compatibility."""
try:
return await service.generate_outline(request)
except Exception as e:
@@ -57,6 +273,42 @@ async def refine_outline(request: BlogOutlineRefineRequest) -> BlogOutlineRespon
raise HTTPException(status_code=500, detail=str(e))
@router.post("/outline/enhance-section")
async def enhance_section(section_data: Dict[str, Any], focus: str = "general improvement"):
"""Enhance a specific section with AI improvements."""
try:
from models.blog_models import BlogOutlineSection
section = BlogOutlineSection(**section_data)
enhanced_section = await service.enhance_section_with_ai(section, focus)
return enhanced_section.dict()
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/outline/optimize")
async def optimize_outline(outline_data: Dict[str, Any], focus: str = "general optimization"):
"""Optimize entire outline for better flow, SEO, and engagement."""
try:
from models.blog_models import BlogOutlineSection
outline = [BlogOutlineSection(**section) for section in outline_data.get('outline', [])]
optimized_outline = await service.optimize_outline_with_ai(outline, focus)
return {"outline": [section.dict() for section in optimized_outline]}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/outline/rebalance")
async def rebalance_outline(outline_data: Dict[str, Any], target_words: int = 1500):
"""Rebalance word count distribution across outline sections."""
try:
from models.blog_models import BlogOutlineSection
outline = [BlogOutlineSection(**section) for section in outline_data.get('outline', [])]
rebalanced_outline = service.rebalance_word_counts(outline, target_words)
return {"outline": [section.dict() for section in rebalanced_outline]}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/section/generate", response_model=BlogSectionResponse)
async def generate_section(request: BlogSectionRequest) -> BlogSectionResponse:
try:

View File

@@ -35,6 +35,7 @@ class BlogResearchResponse(BaseModel):
suggested_angles: List[str] = []
search_widget: Optional[str] = None # HTML content for search widget
search_queries: List[str] = [] # Search queries generated by Gemini
error_message: Optional[str] = None # Error message for graceful failures
class BlogOutlineSection(BaseModel):
@@ -51,6 +52,7 @@ class BlogOutlineRequest(BaseModel):
research: BlogResearchResponse
persona: Optional[PersonaInfo] = None
word_count: Optional[int] = 1500
custom_instructions: Optional[str] = None
class BlogOutlineResponse(BaseModel):

View File

@@ -0,0 +1,151 @@
# AI Blog Writer Service Architecture
This directory contains the refactored AI Blog Writer service with a clean, modular architecture.
## 📁 Directory Structure
```
blog_writer/
├── README.md # This file
├── blog_service.py # Main entry point (imports from core)
├── core/ # Core service orchestrator
│ ├── __init__.py
│ └── blog_writer_service.py # Main service coordinator
├── research/ # Research functionality
│ ├── __init__.py
│ ├── research_service.py # Main research orchestrator
│ ├── keyword_analyzer.py # AI-powered keyword analysis
│ ├── competitor_analyzer.py # Competitor intelligence
│ └── content_angle_generator.py # Content angle discovery
├── outline/ # Outline generation
│ ├── __init__.py
│ ├── outline_service.py # Main outline orchestrator
│ ├── outline_generator.py # AI-powered outline generation
│ ├── outline_optimizer.py # Outline optimization
│ └── section_enhancer.py # Section enhancement
├── content/ # Content generation (TODO)
└── optimization/ # SEO & optimization (TODO)
```
## 🏗️ Architecture Overview
### Core Module (`core/`)
- **`BlogWriterService`**: Main orchestrator that coordinates all blog writing functionality
- Provides a unified interface for research, outline generation, and content creation
- Delegates to specialized modules for specific functionality
### Research Module (`research/`)
- **`ResearchService`**: Orchestrates comprehensive research using Google Search grounding
- **`KeywordAnalyzer`**: AI-powered keyword analysis and extraction
- **`CompetitorAnalyzer`**: Competitor intelligence and market analysis
- **`ContentAngleGenerator`**: Strategic content angle discovery
### Outline Module (`outline/`)
- **`OutlineService`**: Manages outline generation, refinement, and optimization
- **`OutlineGenerator`**: AI-powered outline generation from research data
- **`OutlineOptimizer`**: Optimizes outlines for flow, SEO, and engagement
- **`SectionEnhancer`**: Enhances individual sections using AI
## 🔄 Service Flow
1. **Research Phase**: `ResearchService``KeywordAnalyzer` + `CompetitorAnalyzer` + `ContentAngleGenerator`
2. **Outline Phase**: `OutlineService``OutlineGenerator``OutlineOptimizer`
3. **Content Phase**: (TODO) Content generation and optimization
4. **Publishing Phase**: (TODO) Platform integration and publishing
## 🚀 Usage
```python
from services.blog_writer.blog_service import BlogWriterService
# Initialize the service
service = BlogWriterService()
# Research a topic
research_result = await service.research(research_request)
# Generate outline from research
outline_result = await service.generate_outline(outline_request)
# Enhance sections
enhanced_section = await service.enhance_section_with_ai(section, "SEO optimization")
```
## 🎯 Key Benefits
### 1. **Modularity**
- Each module has a single responsibility
- Easy to test, maintain, and extend
- Clear separation of concerns
### 2. **Reusability**
- Components can be used independently
- Easy to swap implementations
- Shared utilities and helpers
### 3. **Scalability**
- New features can be added as separate modules
- Existing modules can be enhanced without affecting others
- Clear interfaces between modules
### 4. **Maintainability**
- Smaller, focused files are easier to understand
- Changes are isolated to specific modules
- Clear dependency relationships
## 🔧 Development Guidelines
### Adding New Features
1. Identify the appropriate module (research, outline, content, optimization)
2. Create new classes following the existing patterns
3. Update the module's `__init__.py` to export new classes
4. Add methods to the appropriate service orchestrator
5. Update the main `BlogWriterService` if needed
### Testing
- Each module should have its own test suite
- Mock external dependencies (AI providers, APIs)
- Test both success and failure scenarios
- Maintain high test coverage
### Error Handling
- Use graceful degradation with fallbacks
- Log errors appropriately
- Return meaningful error messages to users
- Don't let one module's failure break the entire flow
## 📈 Future Enhancements
### Content Module (`content/`)
- Section content generation
- Content optimization and refinement
- Multi-format output (HTML, Markdown, etc.)
### Optimization Module (`optimization/`)
- SEO analysis and recommendations
- Readability optimization
- Performance metrics and analytics
### Integration Module (`integration/`)
- Platform-specific adapters (WordPress, Wix, etc.)
- Publishing workflows
- Content management system integration
## 🔍 Code Quality
- **Type Hints**: All methods use proper type annotations
- **Documentation**: Comprehensive docstrings for all public methods
- **Error Handling**: Graceful failure with meaningful error messages
- **Logging**: Structured logging with appropriate levels
- **Testing**: Unit tests for all major functionality
- **Performance**: Efficient caching and API usage
## 📝 Migration Notes
The original `blog_service.py` has been refactored into this modular structure:
- **Research functionality** → `research/` module
- **Outline generation** → `outline/` module
- **Service orchestration** → `core/` module
- **Main entry point** → `blog_service.py` (now just imports from core)
All existing API endpoints continue to work without changes due to the maintained interface in `BlogWriterService`.

View File

@@ -1,649 +1,11 @@
from typing import Any, Dict, List
from loguru import logger
from services.llm_providers.gemini_provider import gemini_structured_json_response
from models.blog_models import (
BlogResearchRequest,
BlogResearchResponse,
BlogOutlineRequest,
BlogOutlineResponse,
BlogOutlineRefineRequest,
BlogSectionRequest,
BlogSectionResponse,
BlogOptimizeRequest,
BlogOptimizeResponse,
BlogSEOAnalyzeRequest,
BlogSEOAnalyzeResponse,
BlogSEOMetadataRequest,
BlogSEOMetadataResponse,
BlogPublishRequest,
BlogPublishResponse,
ResearchSource,
BlogOutlineSection,
)
class BlogWriterService:
"""Service layer for AI Blog Writer (stub implementations for scaffolding)."""
async def research(self, request: BlogResearchRequest) -> BlogResearchResponse:
"""
Stage 1: Research & Strategy (AI Orchestration)
Uses ONLY Gemini's native Google Search grounding - ONE API call for everything.
Follows LinkedIn service pattern for efficiency and cost optimization.
"""
from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
gemini = GeminiGroundedProvider()
topic = request.topic or ", ".join(request.keywords)
industry = request.industry or (request.persona.industry if request.persona and request.persona.industry else "General")
target_audience = getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'
# Single comprehensive research prompt - Gemini handles Google Search automatically
research_prompt = f"""
Research the topic "{topic}" in the {industry} industry for {target_audience} audience. Provide a comprehensive analysis including:
1. Current trends and insights (2024-2025)
2. Key statistics and data points with sources
3. Industry expert opinions and quotes
4. Recent developments and news
5. Market analysis and forecasts
6. Best practices and case studies
7. Keyword analysis: primary, secondary, and long-tail opportunities
8. Competitor analysis: top players and content gaps
9. Content angle suggestions: 5 compelling angles for blog posts
Focus on factual, up-to-date information from credible sources.
Include specific data points, percentages, and recent developments.
Structure your response with clear sections for each analysis area.
"""
# Single Gemini call with native Google Search grounding - no fallbacks
gemini_result = await gemini.generate_grounded_content(
prompt=research_prompt,
content_type="research",
max_tokens=2000
)
# Extract sources from grounding metadata
sources = self._extract_sources_from_grounding(gemini_result)
# Extract search widget and queries for UI display
search_widget = gemini_result.get("search_widget", "") or ""
search_queries = gemini_result.get("search_queries", []) or []
# Parse the comprehensive response for different analysis components
content = gemini_result.get("content", "")
keyword_analysis = self._parse_keyword_analysis(content, request.keywords)
competitor_analysis = self._parse_competitor_analysis(content)
suggested_angles = self._parse_content_angles(content, topic, industry)
logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
return BlogResearchResponse(
success=True,
sources=sources,
keyword_analysis=keyword_analysis,
competitor_analysis=competitor_analysis,
suggested_angles=suggested_angles,
# Add search widget and queries for UI display
search_widget=search_widget if 'search_widget' in locals() else "",
search_queries=search_queries if 'search_queries' in locals() else [],
)
def _extract_sources_from_grounding(self, gemini_result: Dict[str, Any]) -> List[ResearchSource]:
"""Extract sources from Gemini grounding metadata."""
sources = []
# The Gemini grounded provider already extracts sources and puts them in the 'sources' field
raw_sources = gemini_result.get("sources", [])
for src in raw_sources:
source = ResearchSource(
title=src.get("title", "Untitled"),
url=src.get("url", ""),
excerpt=src.get("content", "")[:500] if src.get("content") else f"Source from {src.get('title', 'web')}",
credibility_score=float(src.get("credibility_score", 0.8)),
published_at=str(src.get("publication_date", "2024-01-01"))
)
sources.append(source)
return sources
def _parse_keyword_analysis(self, content: str, original_keywords: List[str]) -> Dict[str, Any]:
"""Parse keyword analysis from the research content."""
# Extract keywords from content sections
lines = content.split('\n')
keyword_section = []
in_keyword_section = False
for line in lines:
if 'keyword' in line.lower() and ('analysis' in line.lower() or 'primary' in line.lower()):
in_keyword_section = True
continue
if in_keyword_section and line.strip():
if line.startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.')):
break
keyword_section.append(line.strip())
return {
"primary": original_keywords[:1] if original_keywords else [],
"secondary": original_keywords[1:] if len(original_keywords) > 1 else [],
"long_tail": [f"{kw} guide" for kw in original_keywords[:2]] if original_keywords else [],
"search_intent": "informational",
"difficulty": 6,
"content_gaps": [f"{kw} best practices" for kw in original_keywords[:2]] if original_keywords else [],
"analysis_content": "\n".join(keyword_section) if keyword_section else content[:200]
}
def _parse_competitor_analysis(self, content: str) -> Dict[str, Any]:
"""Parse competitor analysis from the research content."""
lines = content.split('\n')
competitor_section = []
in_competitor_section = False
for line in lines:
if 'competitor' in line.lower() and ('analysis' in line.lower() or 'top' in line.lower()):
in_competitor_section = True
continue
if in_competitor_section and line.strip():
if line.startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.')):
break
competitor_section.append(line.strip())
return {
"top_competitors": [],
"content_gaps": [],
"opportunities": [],
"analysis_notes": "\n".join(competitor_section) if competitor_section else "Competitor analysis from research"
}
def _parse_content_angles(self, content: str, topic: str, industry: str) -> List[str]:
"""Parse content angles from the research content."""
lines = content.split('\n')
angles_section = []
in_angles_section = False
for line in lines:
if 'angle' in line.lower() and ('suggest' in line.lower() or 'content' in line.lower()):
in_angles_section = True
continue
if in_angles_section and line.strip():
if line.startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.')):
break
if line.strip() and not line.startswith(('', '-', '*')):
angles_section.append(line.strip())
# If no angles found in content, use fallback
if not angles_section:
angles_section = [
f"How {topic} is Transforming {industry}",
f"Latest {topic} Trends: What You Need to Know",
f"{topic} Best Practices for {industry}",
f"Case Study: {topic} Success Stories",
f"The Future of {topic} in {industry}"
]
return angles_section[:5] # Return top 5 angles
async def generate_outline(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
"""
Stage 2: Content Planning with AI-generated outline using research results
Uses Gemini with research data to create comprehensive, SEO-optimized outline
"""
# Extract research insights
research = request.research
primary_keywords = research.keyword_analysis.get('primary', [])
secondary_keywords = research.keyword_analysis.get('secondary', [])
content_angles = research.suggested_angles
sources = research.sources
search_intent = research.keyword_analysis.get('search_intent', 'informational')
# Build sophisticated outline generation prompt with advanced content strategy
outline_prompt = f"""
You are a world-class content strategist and SEO expert with 15+ years of experience creating viral, high-converting blog content. Your outlines have generated millions of views and driven significant business results.
CONTENT STRATEGY BRIEF:
Topic: {', '.join(primary_keywords)}
Search Intent: {search_intent}
Target Word Count: {request.word_count or 1500} words
Industry Context: {getattr(request.persona, 'industry', 'General') if request.persona else 'General'}
Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}
RESEARCH INTELLIGENCE:
Primary Keywords: {', '.join(primary_keywords)}
Secondary Keywords: {', '.join(secondary_keywords)}
Long-tail Opportunities: {', '.join(research.keyword_analysis.get('long_tail', [])[:5])}
Content Angles Discovered:
{chr(10).join([f"{angle}" for angle in content_angles[:6]])}
Research Sources Available: {len(sources)} authoritative sources with current data
STRATEGIC OUTLINE REQUIREMENTS:
1. CONTENT ARCHITECTURE:
- Create 5-7 sections that follow a logical progression
- Each section must have a clear purpose and value proposition
- Build a narrative arc that keeps readers engaged throughout
- Include strategic content gaps that competitors miss
2. SEO OPTIMIZATION:
- Naturally integrate primary keywords in H2 headings (not forced)
- Use secondary keywords in subheadings and key points
- Include long-tail keywords in natural language
- Optimize for featured snippets and voice search
- Create semantic keyword clusters
3. READER ENGAGEMENT:
- Start with a compelling hook that addresses pain points
- Use storytelling elements and real-world examples
- Include actionable insights readers can implement immediately
- Create sections that encourage social sharing
- End with a strong call-to-action
4. CONTENT DEPTH:
- Each section: 2-4 specific, actionable subheadings
- Each section: 4-6 key points with research-backed insights
- Include data points, statistics, and case studies where relevant
- Address common objections and questions
- Provide unique angles not covered by competitors
5. WORD COUNT DISTRIBUTION:
- Introduction: 10-15% of total words
- Main sections: 70-80% of total words (distributed strategically)
- Conclusion: 10-15% of total words
- Total target: {request.word_count or 1500} words
6. COMPETITIVE ADVANTAGE:
- Include fresh perspectives from recent research
- Address emerging trends and future implications
- Provide deeper insights than surface-level content
- Include practical tools, frameworks, or templates
- Reference authoritative sources and data
TITLE STRATEGY:
Create 3 distinct title options that:
- Include primary keywords naturally
- Promise clear value to readers
- Create curiosity and urgency
- Are optimized for click-through rates
- Work well for social media sharing
CRITICAL: Respond ONLY with valid JSON. No additional text or explanations.
JSON FORMAT:
{{
"title_options": [
"Compelling title with primary keyword and benefit",
"Question-based title that creates curiosity",
"How-to title with specific outcome promise"
],
"outline": [
{{
"heading": "Strategic section title with primary keyword",
"subheadings": [
"Specific, actionable subheading 1",
"Data-driven subheading 2",
"Case study or example subheading 3"
],
"key_points": [
"Research-backed insight with specific data",
"Actionable step readers can take immediately",
"Common mistake to avoid with explanation",
"Advanced tip that provides competitive advantage",
"Real-world example or case study"
],
"target_words": 300,
"keywords": ["primary keyword", "secondary keyword", "long-tail phrase"]
}}
]
}}
"""
logger.info("Generating AI-powered outline using research results")
# Define the schema for structured JSON response
outline_schema = {
"type": "object",
"properties": {
"title_options": {
"type": "array",
"items": {"type": "string"},
"description": "3 SEO-optimized title options"
},
"outline": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {"type": "string"},
"heading": {"type": "string"},
"subheadings": {
"type": "array",
"items": {"type": "string"}
},
"key_points": {
"type": "array",
"items": {"type": "string"}
},
"word_count": {"type": "integer"},
"keywords": {
"type": "array",
"items": {"type": "string"},
"description": "Keywords to focus on in this section"
}
},
"required": ["id", "heading", "subheadings", "key_points", "word_count", "keywords"]
}
}
},
"required": ["title_options", "outline"]
}
# Generate outline using structured JSON response (no grounding needed)
outline_data = gemini_structured_json_response(
prompt=outline_prompt,
schema=outline_schema,
temperature=0.3,
max_tokens=3000
)
# Check for errors in the response
if isinstance(outline_data, dict) and 'error' in outline_data:
logger.error(f"Gemini structured response error: {outline_data['error']}")
raise ValueError(f"AI outline generation failed: {outline_data['error']}")
# Validate required fields
if not isinstance(outline_data, dict) or 'outline' not in outline_data or not isinstance(outline_data['outline'], list):
logger.error(f"Invalid outline structure: {outline_data}")
raise ValueError("Invalid outline structure in Gemini response")
# Convert to BlogOutlineSection objects
outline_sections = []
for i, section_data in enumerate(outline_data.get('outline', [])):
if not isinstance(section_data, dict) or 'heading' not in section_data:
logger.warning(f"Skipping invalid section data at index {i}")
continue
section = BlogOutlineSection(
id=f"s{i+1}",
heading=section_data.get('heading', f'Section {i+1}'),
subheadings=section_data.get('subheadings', []),
key_points=section_data.get('key_points', []),
references=sources[:2] if i < 2 else [], # Assign sources to first 2 sections
target_words=section_data.get('target_words', 300),
keywords=section_data.get('keywords', [])
)
outline_sections.append(section)
title_options = outline_data.get('title_options', [])
if not title_options:
raise ValueError("No title options provided in Gemini response")
logger.info(f"Generated outline with {len(outline_sections)} sections and {len(title_options)} title options")
return BlogOutlineResponse(
success=True,
title_options=title_options,
outline=outline_sections
)
async def refine_outline(self, request: BlogOutlineRefineRequest) -> BlogOutlineResponse:
"""
Refine outline with HITL (Human-in-the-Loop) operations
Supports add, remove, move, merge, rename operations
"""
outline = request.outline.copy()
operation = request.operation.lower()
section_id = request.section_id
payload = request.payload or {}
try:
if operation == 'add':
# Add new section
new_section = BlogOutlineSection(
id=f"s{len(outline) + 1}",
heading=payload.get('heading', 'New Section'),
subheadings=payload.get('subheadings', []),
key_points=payload.get('key_points', []),
references=[],
target_words=payload.get('target_words', 300)
)
outline.append(new_section)
logger.info(f"Added new section: {new_section.heading}")
elif operation == 'remove' and section_id:
# Remove section
outline = [s for s in outline if s.id != section_id]
logger.info(f"Removed section: {section_id}")
elif operation == 'rename' and section_id:
# Rename section
for section in outline:
if section.id == section_id:
section.heading = payload.get('heading', section.heading)
break
logger.info(f"Renamed section {section_id} to: {payload.get('heading')}")
elif operation == 'move' and section_id:
# Move section (reorder)
direction = payload.get('direction', 'down') # 'up' or 'down'
current_index = next((i for i, s in enumerate(outline) if s.id == section_id), -1)
if current_index != -1:
if direction == 'up' and current_index > 0:
outline[current_index], outline[current_index - 1] = outline[current_index - 1], outline[current_index]
elif direction == 'down' and current_index < len(outline) - 1:
outline[current_index], outline[current_index + 1] = outline[current_index + 1], outline[current_index]
logger.info(f"Moved section {section_id} {direction}")
elif operation == 'merge' and section_id:
# Merge with next section
current_index = next((i for i, s in enumerate(outline) if s.id == section_id), -1)
if current_index != -1 and current_index < len(outline) - 1:
current_section = outline[current_index]
next_section = outline[current_index + 1]
# Merge sections
current_section.heading = f"{current_section.heading} & {next_section.heading}"
current_section.subheadings.extend(next_section.subheadings)
current_section.key_points.extend(next_section.key_points)
current_section.references.extend(next_section.references)
current_section.target_words = (current_section.target_words or 0) + (next_section.target_words or 0)
# Remove the next section
outline.pop(current_index + 1)
logger.info(f"Merged section {section_id} with next section")
elif operation == 'update' and section_id:
# Update section details
for section in outline:
if section.id == section_id:
if 'heading' in payload:
section.heading = payload['heading']
if 'subheadings' in payload:
section.subheadings = payload['subheadings']
if 'key_points' in payload:
section.key_points = payload['key_points']
if 'target_words' in payload:
section.target_words = payload['target_words']
break
logger.info(f"Updated section {section_id}")
# Reassign IDs to maintain order
for i, section in enumerate(outline):
section.id = f"s{i+1}"
return BlogOutlineResponse(
success=True,
title_options=["Refined Outline"],
outline=outline
)
except Exception as e:
logger.error(f"Outline refinement failed: {e}")
return BlogOutlineResponse(
success=False,
title_options=["Error"],
outline=request.outline
)
async def generate_section(self, request: BlogSectionRequest) -> BlogSectionResponse:
# TODO: Generate section markdown incorporating references and persona/tone
md = f"## {request.section.heading}\n\nThis section content will be generated here.\n"
return BlogSectionResponse(success=True, markdown=md, citations=request.section.references)
async def optimize_section(self, request: BlogOptimizeRequest) -> BlogOptimizeResponse:
# TODO: Run readability/EEAT optimization and return diff
return BlogOptimizeResponse(success=True, optimized=request.content, diff_preview=None)
async def hallucination_check(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Run hallucination detection on provided text using existing detector service."""
text = str(payload.get("text", "") or "").strip()
if not text:
return {"success": False, "error": "No text provided"}
# Prefer direct service use over HTTP proxy
try:
from services.hallucination_detector import HallucinationDetector
detector = HallucinationDetector()
result = await detector.detect_hallucinations(text)
# Serialize dataclass-like result to dict
claims = []
for c in result.claims:
claims.append({
"text": c.text,
"confidence": c.confidence,
"assessment": c.assessment,
"supporting_sources": c.supporting_sources,
"refuting_sources": c.refuting_sources,
"reasoning": c.reasoning,
})
return {
"success": True,
"overall_confidence": result.overall_confidence,
"total_claims": result.total_claims,
"supported_claims": result.supported_claims,
"refuted_claims": result.refuted_claims,
"insufficient_claims": result.insufficient_claims,
"timestamp": result.timestamp,
"claims": claims,
}
except Exception as e:
return {"success": False, "error": str(e)}
async def seo_analyze(self, request: BlogSEOAnalyzeRequest) -> BlogSEOAnalyzeResponse:
"""Wrap existing SEO tools to produce unified analysis for blog content."""
from services.seo_tools.on_page_seo_service import OnPageSEOService
from services.seo_tools.image_alt_service import ImageAltService
from services.seo_tools.content_strategy_service import ContentStrategyService
content = request.content or ""
target_keywords = request.keywords or []
# On-page analysis (treat content as a virtual URL/document for now)
on_page = OnPageSEOService()
on_page_result = await on_page.analyze_on_page_seo(url="about:blank", target_keywords=target_keywords)
# Image alt coverage (placeholder: no images in raw content yet)
try:
image_alt_service = ImageAltService()
image_alt_status = {"total_images": 0, "missing_alt": 0}
except Exception:
image_alt_status = {"total_images": 0, "missing_alt": 0}
# Strategy hints (keywords/topics)
try:
strategy = ContentStrategyService()
strategy_hints = await strategy.analyze_content_topics(content=content)
except Exception:
strategy_hints = {"topics": [], "gaps": []}
# Lightweight markdown parsing for headings/links/keywords
import re
content_text = content or ""
words = re.findall(r"[A-Za-z0-9']+", content_text)
total_words = max(len(words), 1)
heading_lines = content_text.splitlines()
h1 = sum(1 for ln in heading_lines if ln.startswith('# '))
h2 = sum(1 for ln in heading_lines if ln.startswith('## '))
h3 = sum(1 for ln in heading_lines if ln.startswith('### '))
md_links = re.findall(r"\[([^\]]+)\]\(([^)]+)\)", content_text)
external_links = [u for (_t, u) in md_links if u.startswith('http')]
# Keyword density
density_map: Dict[str, Any] = {"target_keywords": target_keywords}
for kw in target_keywords:
try:
occurrences = len(re.findall(re.escape(kw), content_text, flags=re.IGNORECASE))
except re.error:
occurrences = 0
density_map[kw] = {
"occurrences": occurrences,
"density": round(occurrences / total_words, 4)
}
# Build unified response
recommendations: List[str] = []
if isinstance(on_page_result.get("recommendations"), list):
recommendations.extend(on_page_result["recommendations"])
if strategy_hints.get("gaps"):
recommendations.append("Cover missing topics: " + ", ".join(strategy_hints["gaps"]))
if not external_links:
recommendations.append("Add at least one credible external link to authoritative sources.")
if h2 < 2:
recommendations.append("Increase number of H2 sections for better structure.")
# Internal link suggestions: generate anchors for H2s and propose cross-links
def to_anchor(h: str) -> str:
import re
a = re.sub(r"[^a-z0-9\s-]", "", h.lower())
a = re.sub(r"\s+", "-", a).strip('-')
return a
h2_headings = [ln[3:].strip() for ln in heading_lines if ln.startswith('## ')]
anchors = [to_anchor(h) for h in h2_headings]
internal_link_suggestions = []
for i in range(len(anchors)-1):
internal_link_suggestions.append({
"from": h2_headings[i],
"to": h2_headings[i+1],
"anchor": f"#{anchors[i+1]}",
"suggestion": f"Add internal link from '{h2_headings[i]}' to '{h2_headings[i+1]}'"
})
return BlogSEOAnalyzeResponse(
success=True,
seo_score=float(on_page_result.get("overall_score", 75)),
density=density_map,
structure={
**on_page_result.get("heading_structure", {}),
"markdown_headings": {"h1": h1, "h2": h2, "h3": h3},
"links": {"total": len(md_links), "external": len(external_links)}
},
readability=on_page_result.get("content_analysis", {}),
link_suggestions=([{"suggestion": "Add external citation links for key claims."}] if not external_links else []) + internal_link_suggestions,
image_alt_status=image_alt_status,
recommendations=recommendations,
)
async def seo_metadata(self, request: BlogSEOMetadataRequest) -> BlogSEOMetadataResponse:
# TODO: Generate SEO metadata using existing services
return BlogSEOMetadataResponse(
success=True,
title_options=[request.title or "Generated SEO Title"],
meta_descriptions=["Compelling meta description..."],
open_graph={"title": request.title or "OG Title", "image": ""},
twitter_card={"card": "summary_large_image"},
schema={"@type": "Article"},
)
async def publish(self, request: BlogPublishRequest) -> BlogPublishResponse:
# TODO: Call Wix/WordPress adapters to publish
return BlogPublishResponse(success=True, platform=request.platform, url="https://example.com/post")
"""
AI Blog Writer Service - Main entry point for blog writing functionality.
This module provides a clean interface to the modular blog writer services.
The actual implementation has been refactored into specialized modules:
- research/ - Research and keyword analysis
- outline/ - Outline generation and optimization
- core/ - Main service orchestrator
"""
from .core import BlogWriterService

View File

@@ -0,0 +1,11 @@
"""
Core module for AI Blog Writer.
This module contains the main service orchestrator and shared utilities.
"""
from .blog_writer_service import BlogWriterService
__all__ = [
'BlogWriterService'
]

View File

@@ -0,0 +1,233 @@
"""
Blog Writer Service - Main orchestrator for AI Blog Writer.
Coordinates research, outline generation, content creation, and optimization.
"""
from typing import Dict, Any, List
from loguru import logger
from models.blog_models import (
BlogResearchRequest,
BlogResearchResponse,
BlogOutlineRequest,
BlogOutlineResponse,
BlogOutlineRefineRequest,
BlogSectionRequest,
BlogSectionResponse,
BlogOptimizeRequest,
BlogOptimizeResponse,
BlogSEOAnalyzeRequest,
BlogSEOAnalyzeResponse,
BlogSEOMetadataRequest,
BlogSEOMetadataResponse,
BlogPublishRequest,
BlogPublishResponse,
BlogOutlineSection,
)
from ..research import ResearchService
from ..outline import OutlineService
class BlogWriterService:
"""Main service orchestrator for AI Blog Writer functionality."""
def __init__(self):
self.research_service = ResearchService()
self.outline_service = OutlineService()
# Research Methods
async def research(self, request: BlogResearchRequest) -> BlogResearchResponse:
"""Conduct comprehensive research using Google Search grounding."""
return await self.research_service.research(request)
async def research_with_progress(self, request: BlogResearchRequest, task_id: str) -> BlogResearchResponse:
"""Conduct research with real-time progress updates."""
return await self.research_service.research_with_progress(request, task_id)
# Outline Methods
async def generate_outline(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
"""Generate AI-powered outline from research data."""
return await self.outline_service.generate_outline(request)
async def generate_outline_with_progress(self, request: BlogOutlineRequest, task_id: str) -> BlogOutlineResponse:
"""Generate outline with real-time progress updates."""
return await self.outline_service.generate_outline_with_progress(request, task_id)
async def refine_outline(self, request: BlogOutlineRefineRequest) -> BlogOutlineResponse:
"""Refine outline with HITL operations."""
return await self.outline_service.refine_outline(request)
async def enhance_section_with_ai(self, section: BlogOutlineSection, focus: str = "general improvement") -> BlogOutlineSection:
"""Enhance a section using AI."""
return await self.outline_service.enhance_section_with_ai(section, focus)
async def optimize_outline_with_ai(self, outline: List[BlogOutlineSection], focus: str = "general optimization") -> List[BlogOutlineSection]:
"""Optimize entire outline for better flow and SEO."""
return await self.outline_service.optimize_outline_with_ai(outline, focus)
def rebalance_word_counts(self, outline: List[BlogOutlineSection], target_words: int) -> List[BlogOutlineSection]:
"""Rebalance word count distribution across sections."""
return self.outline_service.rebalance_word_counts(outline, target_words)
# Content Generation Methods (TODO: Extract to content module)
async def generate_section(self, request: BlogSectionRequest) -> BlogSectionResponse:
"""Generate section content from outline."""
# TODO: Move to content module
md = f"## {request.section.heading}\n\nThis section content will be generated here.\n"
return BlogSectionResponse(success=True, markdown=md, citations=request.section.references)
async def optimize_section(self, request: BlogOptimizeRequest) -> BlogOptimizeResponse:
"""Optimize section content for readability and SEO."""
# TODO: Move to optimization module
return BlogOptimizeResponse(success=True, optimized=request.content, diff_preview=None)
# SEO and Analysis Methods (TODO: Extract to optimization module)
async def hallucination_check(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Run hallucination detection on provided text."""
text = str(payload.get("text", "") or "").strip()
if not text:
return {"success": False, "error": "No text provided"}
# Prefer direct service use over HTTP proxy
try:
from services.hallucination_detector import HallucinationDetector
detector = HallucinationDetector()
result = await detector.detect_hallucinations(text)
# Serialize dataclass-like result to dict
claims = []
for c in result.claims:
claims.append({
"text": c.text,
"confidence": c.confidence,
"assessment": c.assessment,
"supporting_sources": c.supporting_sources,
"refuting_sources": c.refuting_sources,
"reasoning": c.reasoning,
})
return {
"success": True,
"overall_confidence": result.overall_confidence,
"total_claims": result.total_claims,
"supported_claims": result.supported_claims,
"refuted_claims": result.refuted_claims,
"insufficient_claims": result.insufficient_claims,
"timestamp": result.timestamp,
"claims": claims,
}
except Exception as e:
return {"success": False, "error": str(e)}
async def seo_analyze(self, request: BlogSEOAnalyzeRequest) -> BlogSEOAnalyzeResponse:
"""Analyze content for SEO optimization."""
from services.seo_tools.on_page_seo_service import OnPageSEOService
from services.seo_tools.image_alt_service import ImageAltService
from services.seo_tools.content_strategy_service import ContentStrategyService
content = request.content or ""
target_keywords = request.keywords or []
# On-page analysis (treat content as a virtual URL/document for now)
on_page = OnPageSEOService()
on_page_result = await on_page.analyze_on_page_seo(url="about:blank", target_keywords=target_keywords)
# Image alt coverage (placeholder: no images in raw content yet)
try:
image_alt_service = ImageAltService()
image_alt_status = {"total_images": 0, "missing_alt": 0}
except Exception:
image_alt_status = {"total_images": 0, "missing_alt": 0}
# Strategy hints (keywords/topics)
try:
strategy = ContentStrategyService()
strategy_hints = await strategy.analyze_content_topics(content=content)
except Exception:
strategy_hints = {"topics": [], "gaps": []}
# Lightweight markdown parsing for headings/links/keywords
import re
content_text = content or ""
words = re.findall(r"[A-Za-z0-9']+", content_text)
total_words = max(len(words), 1)
heading_lines = content_text.splitlines()
h1 = sum(1 for ln in heading_lines if ln.startswith('# '))
h2 = sum(1 for ln in heading_lines if ln.startswith('## '))
h3 = sum(1 for ln in heading_lines if ln.startswith('### '))
md_links = re.findall(r"\[([^\]]+)\]\(([^)]+)\)", content_text)
external_links = [u for (_t, u) in md_links if u.startswith('http')]
# Keyword density
density_map: Dict[str, Any] = {"target_keywords": target_keywords}
for kw in target_keywords:
try:
occurrences = len(re.findall(re.escape(kw), content_text, flags=re.IGNORECASE))
except re.error:
occurrences = 0
density_map[kw] = {
"occurrences": occurrences,
"density": round(occurrences / total_words, 4)
}
# Build unified response
recommendations: List[str] = []
if isinstance(on_page_result.get("recommendations"), list):
recommendations.extend(on_page_result["recommendations"])
if strategy_hints.get("gaps"):
recommendations.append("Cover missing topics: " + ", ".join(strategy_hints["gaps"]))
if not external_links:
recommendations.append("Add at least one credible external link to authoritative sources.")
if h2 < 2:
recommendations.append("Increase number of H2 sections for better structure.")
# Internal link suggestions: generate anchors for H2s and propose cross-links
def to_anchor(h: str) -> str:
import re
a = re.sub(r"[^a-z0-9\s-]", "", h.lower())
a = re.sub(r"\s+", "-", a).strip('-')
return a
h2_headings = [ln[3:].strip() for ln in heading_lines if ln.startswith('## ')]
anchors = [to_anchor(h) for h in h2_headings]
internal_link_suggestions = []
for i in range(len(anchors)-1):
internal_link_suggestions.append({
"from": h2_headings[i],
"to": h2_headings[i+1],
"anchor": f"#{anchors[i+1]}",
"suggestion": f"Add internal link from '{h2_headings[i]}' to '{h2_headings[i+1]}'"
})
return BlogSEOAnalyzeResponse(
success=True,
seo_score=float(on_page_result.get("overall_score", 75)),
density=density_map,
structure={
**on_page_result.get("heading_structure", {}),
"markdown_headings": {"h1": h1, "h2": h2, "h3": h3},
"links": {"total": len(md_links), "external": len(external_links)}
},
readability=on_page_result.get("content_analysis", {}),
link_suggestions=([{"suggestion": "Add external citation links for key claims."}] if not external_links else []) + internal_link_suggestions,
image_alt_status=image_alt_status,
recommendations=recommendations,
)
async def seo_metadata(self, request: BlogSEOMetadataRequest) -> BlogSEOMetadataResponse:
"""Generate SEO metadata for content."""
# TODO: Move to optimization module
return BlogSEOMetadataResponse(
success=True,
title_options=[request.title or "Generated SEO Title"],
meta_descriptions=["Compelling meta description..."],
open_graph={"title": request.title or "OG Title", "image": ""},
twitter_card={"card": "summary_large_image"},
schema={"@type": "Article"},
)
async def publish(self, request: BlogPublishRequest) -> BlogPublishResponse:
"""Publish content to specified platform."""
# TODO: Move to content module
return BlogPublishResponse(success=True, platform=request.platform, url="https://example.com/post")

View File

@@ -0,0 +1,21 @@
"""
Outline module for AI Blog Writer.
This module handles all outline-related functionality including:
- AI-powered outline generation
- Outline refinement and optimization
- Section enhancement and rebalancing
- Strategic content planning
"""
from .outline_service import OutlineService
from .outline_generator import OutlineGenerator
from .outline_optimizer import OutlineOptimizer
from .section_enhancer import SectionEnhancer
__all__ = [
'OutlineService',
'OutlineGenerator',
'OutlineOptimizer',
'SectionEnhancer'
]

View File

@@ -0,0 +1,351 @@
"""
Outline Generator - AI-powered outline generation from research data.
Generates comprehensive, SEO-optimized outlines using research intelligence.
"""
from typing import Dict, Any, List
import asyncio
from loguru import logger
from models.blog_models import (
BlogOutlineRequest,
BlogOutlineResponse,
BlogOutlineSection,
)
class OutlineGenerator:
"""Generates AI-powered outlines from research data."""
async def generate(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
"""
Generate AI-powered outline using research results
"""
# Extract research insights
research = request.research
primary_keywords = research.keyword_analysis.get('primary', [])
secondary_keywords = research.keyword_analysis.get('secondary', [])
content_angles = research.suggested_angles
sources = research.sources
search_intent = research.keyword_analysis.get('search_intent', 'informational')
# Check for custom instructions
custom_instructions = getattr(request, 'custom_instructions', None)
# Build comprehensive outline generation prompt with rich research data
outline_prompt = self._build_outline_prompt(
primary_keywords, secondary_keywords, content_angles, sources,
search_intent, request, custom_instructions
)
logger.info("Generating AI-powered outline using research results")
# Define schema with proper property ordering (critical for Gemini API)
outline_schema = self._get_outline_schema()
# Generate outline using structured JSON response with retry logic
outline_data = await self._generate_with_retry(outline_prompt, outline_schema)
# Convert to BlogOutlineSection objects
outline_sections = self._convert_to_sections(outline_data, sources)
# Extract title options
title_options = outline_data.get('title_options', [])
if not title_options:
title_options = self._generate_fallback_titles(primary_keywords)
logger.info(f"Generated outline with {len(outline_sections)} sections and {len(title_options)} title options")
return BlogOutlineResponse(
success=True,
title_options=title_options,
outline=outline_sections
)
async def generate_with_progress(self, request: BlogOutlineRequest, task_id: str) -> BlogOutlineResponse:
"""
Outline generation method with progress updates for real-time feedback.
"""
from api.blog_writer.router import _update_progress
# Extract research insights
research = request.research
primary_keywords = research.keyword_analysis.get('primary', [])
secondary_keywords = research.keyword_analysis.get('secondary', [])
content_angles = research.suggested_angles
sources = research.sources
search_intent = research.keyword_analysis.get('search_intent', 'informational')
# Check for custom instructions
custom_instructions = getattr(request, 'custom_instructions', None)
await _update_progress(task_id, "📊 Analyzing research data and building content strategy...")
# Build comprehensive outline generation prompt with rich research data
outline_prompt = self._build_outline_prompt(
primary_keywords, secondary_keywords, content_angles, sources,
search_intent, request, custom_instructions
)
await _update_progress(task_id, "🤖 Generating AI-powered outline with research insights...")
# Define schema with proper property ordering (critical for Gemini API)
outline_schema = self._get_outline_schema()
await _update_progress(task_id, "🔄 Making AI request to generate structured outline...")
# Generate outline using structured JSON response with retry logic
outline_data = await self._generate_with_retry(outline_prompt, outline_schema, task_id)
await _update_progress(task_id, "📝 Processing outline structure and validating sections...")
# Convert to BlogOutlineSection objects
outline_sections = self._convert_to_sections(outline_data, sources)
# Extract title options
title_options = outline_data.get('title_options', [])
if not title_options:
title_options = self._generate_fallback_titles(primary_keywords)
await _update_progress(task_id, "✅ Outline generation completed successfully!")
return BlogOutlineResponse(
success=True,
title_options=title_options,
outline=outline_sections
)
def _build_outline_prompt(self, primary_keywords: List[str], secondary_keywords: List[str],
content_angles: List[str], sources: List, search_intent: str,
request: BlogOutlineRequest, custom_instructions: str = None) -> str:
"""Build the comprehensive outline generation prompt."""
return f"""
You are a world-class content strategist and SEO expert with 15+ years of experience creating viral, high-converting blog content. Your outlines have generated millions of views and driven significant business results.
CONTENT STRATEGY BRIEF:
Topic: {', '.join(primary_keywords)}
Search Intent: {search_intent}
Target Word Count: {request.word_count or 1500} words
Industry Context: {getattr(request.persona, 'industry', 'General') if request.persona else 'General'}
Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}
{f"CUSTOM USER INSTRUCTIONS: {custom_instructions}" if custom_instructions else ""}
RESEARCH INTELLIGENCE:
Primary Keywords: {', '.join(primary_keywords)}
Secondary Keywords: {', '.join(secondary_keywords)}
Long-tail Opportunities: {', '.join(request.research.keyword_analysis.get('long_tail', [])[:5])}
Semantic Keywords: {', '.join(request.research.keyword_analysis.get('semantic_keywords', [])[:5])}
Trending Terms: {', '.join(request.research.keyword_analysis.get('trending_terms', [])[:3])}
Keyword Difficulty: {request.research.keyword_analysis.get('difficulty', 6)}/10
Content Gaps: {', '.join(request.research.keyword_analysis.get('content_gaps', [])[:3])}
Content Angles Discovered:
{chr(10).join([f"{angle}" for angle in content_angles[:6]])}
Competitive Intelligence:
Top Competitors: {', '.join(request.research.competitor_analysis.get('top_competitors', [])[:3])}
Market Opportunities: {', '.join(request.research.competitor_analysis.get('opportunities', [])[:3])}
Competitive Advantages: {', '.join(request.research.competitor_analysis.get('competitive_advantages', [])[:3])}
Market Positioning: {request.research.competitor_analysis.get('market_positioning', 'Standard positioning')}
Research Sources Available: {len(sources)} authoritative sources with current data
Key Statistics Available: Multiple data points, percentages, and expert quotes from credible sources
STRATEGIC OUTLINE REQUIREMENTS:
{f"CUSTOM REQUIREMENTS: {custom_instructions}" if custom_instructions else ""}
1. CONTENT ARCHITECTURE:
- Create a logical, engaging narrative arc that guides readers from problem to solution
- Structure content to build authority and trust progressively
- Include data-driven insights and expert opinions from research
- Ensure each section adds unique value and builds upon previous sections
2. SEO OPTIMIZATION:
- Naturally integrate primary keywords in headings and content
- Use secondary keywords strategically throughout sections
- Include long-tail keywords in subheadings and key points
- Optimize for featured snippets and voice search
3. READER ENGAGEMENT:
- Start with compelling hooks and pain points
- Use storytelling elements and real-world examples
- Include actionable insights and practical takeaways
- End with clear next steps and calls-to-action
4. CONTENT DEPTH:
- Provide comprehensive coverage of the topic
- Include multiple perspectives and expert insights
- Address common questions and objections
- Offer unique angles not covered by competitors
5. WORD COUNT DISTRIBUTION:
- Introduction: 12% of total word count
- Main content sections: 76% of total word count
- Conclusion: 12% of total word count
- Ensure balanced section lengths for optimal readability
6. COMPETITIVE ADVANTAGE:
- Leverage content gaps identified in research
- Include unique data points and statistics
- Provide fresh perspectives on trending topics
- Address underserved audience segments
TITLE STRATEGY:
Create 5 compelling title options that:
- Include primary keywords naturally
- Promise clear value and outcomes
- Appeal to the target audience's pain points
- Stand out from competitor content
- Optimize for click-through rates
Generate a comprehensive outline with the following structure:
{{
"title_options": [
"Title 1 with primary keyword",
"Title 2 with emotional hook",
"Title 3 with benefit-focused approach",
"Title 4 with question format",
"Title 5 with urgency/trending angle"
],
"outline": [
{{
"heading": "Section heading with primary keyword",
"subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
"key_points": ["Key point 1", "Key point 2", "Key point 3"],
"word_count": 300,
"keywords": ["primary keyword", "secondary keyword"]
}}
]
}}
"""
def _get_outline_schema(self) -> Dict[str, Any]:
"""Get the structured JSON schema for outline generation."""
return {
"type": "object",
"properties": {
"title_options": {
"type": "array",
"items": {"type": "string"}
},
"outline": {
"type": "array",
"items": {
"type": "object",
"properties": {
"heading": {"type": "string"},
"subheadings": {
"type": "array",
"items": {"type": "string"}
},
"key_points": {
"type": "array",
"items": {"type": "string"}
},
"word_count": {"type": "integer"},
"keywords": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["heading", "subheadings", "key_points", "word_count", "keywords"]
}
}
},
"required": ["title_options", "outline"],
"propertyOrdering": ["title_options", "outline"]
}
async def _generate_with_retry(self, prompt: str, schema: Dict[str, Any], task_id: str = None) -> Dict[str, Any]:
"""Generate outline with retry logic for API failures."""
from services.llm_providers.gemini_provider import gemini_structured_json_response
from api.blog_writer.router import _update_progress
max_retries = 2 # Conservative retry for expensive API calls
retry_delay = 5 # 5 second delay between retries
for attempt in range(max_retries + 1):
try:
if task_id:
await _update_progress(task_id, f"🤖 Calling Gemini API for outline generation (attempt {attempt + 1}/{max_retries + 1})...")
outline_data = gemini_structured_json_response(
prompt=prompt,
schema=schema,
temperature=0.3,
max_tokens=4000 # Increased to avoid MAX_TOKENS truncation
)
# Log response for debugging
logger.info(f"Gemini response received: {type(outline_data)}")
# Check for errors in the response
if isinstance(outline_data, dict) and 'error' in outline_data:
error_msg = str(outline_data['error'])
if "503" in error_msg and "overloaded" in error_msg and attempt < max_retries:
if task_id:
await _update_progress(task_id, f"⚠️ AI service overloaded, retrying in {retry_delay} seconds...")
logger.warning(f"Gemini API overloaded, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
await asyncio.sleep(retry_delay)
continue
else:
logger.error(f"Gemini structured response error: {outline_data['error']}")
raise ValueError(f"AI outline generation failed: {outline_data['error']}")
# Validate required fields
if not isinstance(outline_data, dict) or 'outline' not in outline_data or not isinstance(outline_data['outline'], list):
if attempt < max_retries:
if task_id:
await _update_progress(task_id, f"⚠️ Invalid response structure, retrying in {retry_delay} seconds...")
logger.warning(f"Invalid response structure, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
await asyncio.sleep(retry_delay)
continue
else:
raise ValueError("Invalid outline structure in Gemini response")
# If we get here, the response is valid
return outline_data
except Exception as e:
error_str = str(e)
if ("503" in error_str or "overloaded" in error_str) and attempt < max_retries:
if task_id:
await _update_progress(task_id, f"⚠️ AI service error, retrying in {retry_delay} seconds...")
logger.warning(f"Gemini API error, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1}): {error_str}")
await asyncio.sleep(retry_delay)
continue
else:
logger.error(f"Outline generation failed after {attempt + 1} attempts: {error_str}")
raise ValueError(f"AI outline generation failed: {error_str}")
def _convert_to_sections(self, outline_data: Dict[str, Any], sources: List) -> List[BlogOutlineSection]:
"""Convert outline data to BlogOutlineSection objects."""
outline_sections = []
for i, section_data in enumerate(outline_data.get('outline', [])):
if not isinstance(section_data, dict) or 'heading' not in section_data:
continue
section = BlogOutlineSection(
id=f"s{i+1}",
heading=section_data.get('heading', f'Section {i+1}'),
subheadings=section_data.get('subheadings', []),
key_points=section_data.get('key_points', []),
references=sources[:3], # Use first 3 sources as references
target_words=section_data.get('word_count', 200),
keywords=section_data.get('keywords', [])
)
outline_sections.append(section)
return outline_sections
def _generate_fallback_titles(self, primary_keywords: List[str]) -> List[str]:
"""Generate fallback titles when AI generation fails."""
primary_keyword = primary_keywords[0] if primary_keywords else "Topic"
return [
f"The Complete Guide to {primary_keyword}",
f"{primary_keyword}: Everything You Need to Know",
f"How to Master {primary_keyword} in 2024"
]

View File

@@ -0,0 +1,114 @@
"""
Outline Optimizer - AI-powered outline optimization and rebalancing.
Optimizes outlines for better flow, SEO, and engagement.
"""
from typing import List
from loguru import logger
from models.blog_models import BlogOutlineSection
class OutlineOptimizer:
"""Optimizes outlines for better flow, SEO, and engagement."""
async def optimize(self, outline: List[BlogOutlineSection], focus: str = "general optimization") -> List[BlogOutlineSection]:
"""Optimize entire outline for better flow, SEO, and engagement."""
outline_text = "\n".join([f"{i+1}. {s.heading}" for i, s in enumerate(outline)])
optimization_prompt = f"""
Optimize this blog outline for better flow, engagement, and SEO:
Current Outline:
{outline_text}
Optimization Focus: {focus}
Optimization Goals:
- Improve narrative flow and logical progression
- Enhance SEO with better keyword distribution
- Increase engagement with compelling headings
- Ensure comprehensive coverage of the topic
- Optimize for featured snippets and voice search
Respond with JSON array of optimized sections:
[
{{
"heading": "Optimized heading",
"subheadings": ["subheading 1", "subheading 2"],
"key_points": ["point 1", "point 2"],
"target_words": 300,
"keywords": ["keyword1", "keyword2"]
}}
]
"""
try:
from services.llm_providers.gemini_provider import gemini_structured_json_response
optimization_schema = {
"type": "array",
"items": {
"type": "object",
"properties": {
"heading": {"type": "string"},
"subheadings": {"type": "array", "items": {"type": "string"}},
"key_points": {"type": "array", "items": {"type": "string"}},
"target_words": {"type": "integer"},
"keywords": {"type": "array", "items": {"type": "string"}}
},
"required": ["heading", "subheadings", "key_points", "target_words", "keywords"]
}
}
optimized_data = gemini_structured_json_response(
prompt=optimization_prompt,
schema=optimization_schema,
temperature=0.3,
max_tokens=2000
)
if isinstance(optimized_data, list):
optimized_sections = []
for i, section_data in enumerate(optimized_data):
section = BlogOutlineSection(
id=f"s{i+1}",
heading=section_data.get('heading', f'Section {i+1}'),
subheadings=section_data.get('subheadings', []),
key_points=section_data.get('key_points', []),
references=outline[i].references if i < len(outline) else [],
target_words=section_data.get('target_words', 300),
keywords=section_data.get('keywords', [])
)
optimized_sections.append(section)
return optimized_sections
except Exception as e:
logger.warning(f"AI outline optimization failed: {e}")
return outline
def rebalance_word_counts(self, outline: List[BlogOutlineSection], target_words: int) -> List[BlogOutlineSection]:
"""Rebalance word count distribution across sections."""
total_sections = len(outline)
if total_sections == 0:
return outline
# Calculate target distribution
intro_words = int(target_words * 0.12) # 12% for intro
conclusion_words = int(target_words * 0.12) # 12% for conclusion
main_content_words = target_words - intro_words - conclusion_words
# Distribute main content words across sections
words_per_section = main_content_words // total_sections
remainder = main_content_words % total_sections
for i, section in enumerate(outline):
if i == 0: # First section (intro)
section.target_words = intro_words
elif i == total_sections - 1: # Last section (conclusion)
section.target_words = conclusion_words
else: # Main content sections
section.target_words = words_per_section + (1 if i < remainder else 0)
return outline

View File

@@ -0,0 +1,154 @@
"""
Outline Service - Core outline generation and management functionality.
Handles AI-powered outline generation, refinement, and optimization.
"""
from typing import Dict, Any, List
import asyncio
from loguru import logger
from models.blog_models import (
BlogOutlineRequest,
BlogOutlineResponse,
BlogOutlineRefineRequest,
BlogOutlineSection,
)
from .outline_generator import OutlineGenerator
from .outline_optimizer import OutlineOptimizer
from .section_enhancer import SectionEnhancer
class OutlineService:
"""Service for generating and managing blog outlines using AI."""
def __init__(self):
self.outline_generator = OutlineGenerator()
self.outline_optimizer = OutlineOptimizer()
self.section_enhancer = SectionEnhancer()
async def generate_outline(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
"""
Stage 2: Content Planning with AI-generated outline using research results
Uses Gemini with research data to create comprehensive, SEO-optimized outline
"""
return await self.outline_generator.generate(request)
async def generate_outline_with_progress(self, request: BlogOutlineRequest, task_id: str) -> BlogOutlineResponse:
"""
Outline generation method with progress updates for real-time feedback.
"""
return await self.outline_generator.generate_with_progress(request, task_id)
async def refine_outline(self, request: BlogOutlineRefineRequest) -> BlogOutlineResponse:
"""
Refine outline with HITL (Human-in-the-Loop) operations
Supports add, remove, move, merge, rename operations
"""
outline = request.outline.copy()
operation = request.operation.lower()
section_id = request.section_id
payload = request.payload or {}
try:
if operation == 'add':
# Add new section
new_section = BlogOutlineSection(
id=f"s{len(outline) + 1}",
heading=payload.get('heading', 'New Section'),
subheadings=payload.get('subheadings', []),
key_points=payload.get('key_points', []),
references=[],
target_words=payload.get('target_words', 300)
)
outline.append(new_section)
logger.info(f"Added new section: {new_section.heading}")
elif operation == 'remove' and section_id:
# Remove section
outline = [s for s in outline if s.id != section_id]
logger.info(f"Removed section: {section_id}")
elif operation == 'rename' and section_id:
# Rename section
for section in outline:
if section.id == section_id:
section.heading = payload.get('heading', section.heading)
break
logger.info(f"Renamed section {section_id} to: {payload.get('heading')}")
elif operation == 'move' and section_id:
# Move section (reorder)
direction = payload.get('direction', 'down') # 'up' or 'down'
current_index = next((i for i, s in enumerate(outline) if s.id == section_id), -1)
if current_index != -1:
if direction == 'up' and current_index > 0:
outline[current_index], outline[current_index - 1] = outline[current_index - 1], outline[current_index]
elif direction == 'down' and current_index < len(outline) - 1:
outline[current_index], outline[current_index + 1] = outline[current_index + 1], outline[current_index]
logger.info(f"Moved section {section_id} {direction}")
elif operation == 'merge' and section_id:
# Merge with next section
current_index = next((i for i, s in enumerate(outline) if s.id == section_id), -1)
if current_index != -1 and current_index < len(outline) - 1:
current_section = outline[current_index]
next_section = outline[current_index + 1]
# Merge sections
current_section.heading = f"{current_section.heading} & {next_section.heading}"
current_section.subheadings.extend(next_section.subheadings)
current_section.key_points.extend(next_section.key_points)
current_section.references.extend(next_section.references)
current_section.target_words = (current_section.target_words or 0) + (next_section.target_words or 0)
# Remove the next section
outline.pop(current_index + 1)
logger.info(f"Merged section {section_id} with next section")
elif operation == 'update' and section_id:
# Update section details
for section in outline:
if section.id == section_id:
if 'heading' in payload:
section.heading = payload['heading']
if 'subheadings' in payload:
section.subheadings = payload['subheadings']
if 'key_points' in payload:
section.key_points = payload['key_points']
if 'target_words' in payload:
section.target_words = payload['target_words']
break
logger.info(f"Updated section {section_id}")
# Reassign IDs to maintain order
for i, section in enumerate(outline):
section.id = f"s{i+1}"
return BlogOutlineResponse(
success=True,
title_options=["Refined Outline"],
outline=outline
)
except Exception as e:
logger.error(f"Outline refinement failed: {e}")
return BlogOutlineResponse(
success=False,
title_options=["Error"],
outline=request.outline
)
async def enhance_section_with_ai(self, section: BlogOutlineSection, focus: str = "general improvement") -> BlogOutlineSection:
"""Enhance a section using AI with research context."""
return await self.section_enhancer.enhance(section, focus)
async def optimize_outline_with_ai(self, outline: List[BlogOutlineSection], focus: str = "general optimization") -> List[BlogOutlineSection]:
"""Optimize entire outline for better flow, SEO, and engagement."""
return await self.outline_optimizer.optimize(outline, focus)
def rebalance_word_counts(self, outline: List[BlogOutlineSection], target_words: int) -> List[BlogOutlineSection]:
"""Rebalance word count distribution across sections."""
return self.outline_optimizer.rebalance_word_counts(outline, target_words)

View File

@@ -0,0 +1,81 @@
"""
Section Enhancer - AI-powered section enhancement and improvement.
Enhances individual outline sections for better engagement and value.
"""
from loguru import logger
from models.blog_models import BlogOutlineSection
class SectionEnhancer:
"""Enhances individual outline sections using AI."""
async def enhance(self, section: BlogOutlineSection, focus: str = "general improvement") -> BlogOutlineSection:
"""Enhance a section using AI with research context."""
enhancement_prompt = f"""
Enhance the following blog section to make it more engaging, comprehensive, and valuable:
Current Section:
Heading: {section.heading}
Subheadings: {', '.join(section.subheadings)}
Key Points: {', '.join(section.key_points)}
Target Words: {section.target_words}
Keywords: {', '.join(section.keywords)}
Enhancement Focus: {focus}
Improve:
1. Make subheadings more specific and actionable
2. Add more comprehensive key points with data/insights
3. Include practical examples and case studies
4. Address common questions and objections
5. Optimize for SEO with better keyword integration
Respond with JSON:
{{
"heading": "Enhanced heading",
"subheadings": ["enhanced subheading 1", "enhanced subheading 2"],
"key_points": ["enhanced point 1", "enhanced point 2"],
"target_words": 400,
"keywords": ["keyword1", "keyword2"]
}}
"""
try:
from services.llm_providers.gemini_provider import gemini_structured_json_response
enhancement_schema = {
"type": "object",
"properties": {
"heading": {"type": "string"},
"subheadings": {"type": "array", "items": {"type": "string"}},
"key_points": {"type": "array", "items": {"type": "string"}},
"target_words": {"type": "integer"},
"keywords": {"type": "array", "items": {"type": "string"}}
},
"required": ["heading", "subheadings", "key_points", "target_words", "keywords"]
}
enhanced_data = gemini_structured_json_response(
prompt=enhancement_prompt,
schema=enhancement_schema,
temperature=0.4,
max_tokens=1000
)
if isinstance(enhanced_data, dict) and 'error' not in enhanced_data:
return BlogOutlineSection(
id=section.id,
heading=enhanced_data.get('heading', section.heading),
subheadings=enhanced_data.get('subheadings', section.subheadings),
key_points=enhanced_data.get('key_points', section.key_points),
references=section.references,
target_words=enhanced_data.get('target_words', section.target_words),
keywords=enhanced_data.get('keywords', section.keywords)
)
except Exception as e:
logger.warning(f"AI section enhancement failed: {e}")
return section

View File

@@ -0,0 +1,21 @@
"""
Research module for AI Blog Writer.
This module handles all research-related functionality including:
- Google Search grounding integration
- Keyword analysis and competitor research
- Content angle discovery
- Research caching and optimization
"""
from .research_service import ResearchService
from .keyword_analyzer import KeywordAnalyzer
from .competitor_analyzer import CompetitorAnalyzer
from .content_angle_generator import ContentAngleGenerator
__all__ = [
'ResearchService',
'KeywordAnalyzer',
'CompetitorAnalyzer',
'ContentAngleGenerator'
]

View File

@@ -0,0 +1,71 @@
"""
Competitor Analyzer - AI-powered competitor analysis for research content.
Extracts competitor insights and market intelligence from research content.
"""
from typing import Dict, Any
from loguru import logger
class CompetitorAnalyzer:
"""Analyzes competitors and market intelligence from research content."""
def analyze(self, content: str) -> Dict[str, Any]:
"""Parse comprehensive competitor analysis from the research content using AI."""
competitor_prompt = f"""
Analyze the following research content and extract competitor insights:
Research Content:
{content[:3000]}
Extract and analyze:
1. Top competitors mentioned (companies, brands, platforms)
2. Content gaps (what competitors are missing)
3. Market opportunities (untapped areas)
4. Competitive advantages (what makes content unique)
5. Market positioning insights
6. Industry leaders and their strategies
Respond with JSON:
{{
"top_competitors": ["competitor1", "competitor2"],
"content_gaps": ["gap1", "gap2"],
"opportunities": ["opportunity1", "opportunity2"],
"competitive_advantages": ["advantage1", "advantage2"],
"market_positioning": "positioning insights",
"industry_leaders": ["leader1", "leader2"],
"analysis_notes": "Comprehensive competitor analysis summary"
}}
"""
from services.llm_providers.gemini_provider import gemini_structured_json_response
competitor_schema = {
"type": "object",
"properties": {
"top_competitors": {"type": "array", "items": {"type": "string"}},
"content_gaps": {"type": "array", "items": {"type": "string"}},
"opportunities": {"type": "array", "items": {"type": "string"}},
"competitive_advantages": {"type": "array", "items": {"type": "string"}},
"market_positioning": {"type": "string"},
"industry_leaders": {"type": "array", "items": {"type": "string"}},
"analysis_notes": {"type": "string"}
},
"required": ["top_competitors", "content_gaps", "opportunities", "competitive_advantages", "market_positioning", "industry_leaders", "analysis_notes"]
}
competitor_analysis = gemini_structured_json_response(
prompt=competitor_prompt,
schema=competitor_schema,
temperature=0.3,
max_tokens=1000
)
if isinstance(competitor_analysis, dict) and 'error' not in competitor_analysis:
return competitor_analysis
else:
# Fail gracefully - no fallback data
logger.error(f"AI competitor analysis failed: {competitor_analysis}")
raise ValueError(f"Competitor analysis failed: {competitor_analysis.get('error', 'Unknown error')}")

View File

@@ -0,0 +1,79 @@
"""
Content Angle Generator - AI-powered content angle discovery.
Generates strategic content angles from research content for blog posts.
"""
from typing import List
from loguru import logger
class ContentAngleGenerator:
"""Generates strategic content angles from research content."""
def generate(self, content: str, topic: str, industry: str) -> List[str]:
"""Parse strategic content angles from the research content using AI."""
angles_prompt = f"""
Analyze the following research content and create strategic content angles for: {topic} in {industry}
Research Content:
{content[:3000]}
Create 7 compelling content angles that:
1. Leverage current trends and data from the research
2. Address content gaps and opportunities
3. Appeal to different audience segments
4. Include unique perspectives not covered by competitors
5. Incorporate specific statistics, case studies, or expert insights
6. Create emotional connection and urgency
7. Provide actionable value to readers
Each angle should be:
- Specific and data-driven
- Unique and differentiated
- Compelling and click-worthy
- Actionable for readers
Respond with JSON:
{{
"content_angles": [
"Specific angle 1 with data/trends",
"Specific angle 2 with unique perspective",
"Specific angle 3 with actionable insights",
"Specific angle 4 with case study focus",
"Specific angle 5 with future outlook",
"Specific angle 6 with problem-solving focus",
"Specific angle 7 with industry insights"
]
}}
"""
from services.llm_providers.gemini_provider import gemini_structured_json_response
angles_schema = {
"type": "object",
"properties": {
"content_angles": {
"type": "array",
"items": {"type": "string"},
"minItems": 5,
"maxItems": 7
}
},
"required": ["content_angles"]
}
angles_result = gemini_structured_json_response(
prompt=angles_prompt,
schema=angles_schema,
temperature=0.7,
max_tokens=800
)
if isinstance(angles_result, dict) and 'content_angles' in angles_result:
return angles_result['content_angles'][:7]
else:
# Fail gracefully - no fallback data
logger.error(f"AI content angles generation failed: {angles_result}")
raise ValueError(f"Content angles generation failed: {angles_result.get('error', 'Unknown error')}")

View File

@@ -0,0 +1,78 @@
"""
Keyword Analyzer - AI-powered keyword analysis for research content.
Extracts and analyzes keywords from research content using structured AI responses.
"""
from typing import Dict, Any, List
from loguru import logger
class KeywordAnalyzer:
"""Analyzes keywords from research content using AI-powered extraction."""
def analyze(self, content: str, original_keywords: List[str]) -> Dict[str, Any]:
"""Parse comprehensive keyword analysis from the research content using AI."""
# Use AI to extract and analyze keywords from the rich research content
keyword_prompt = f"""
Analyze the following research content and extract comprehensive keyword insights for: {', '.join(original_keywords)}
Research Content:
{content[:3000]} # Limit to avoid token limits
Extract and analyze:
1. Primary keywords (main topic terms)
2. Secondary keywords (related terms, synonyms)
3. Long-tail opportunities (specific phrases people search for)
4. Search intent (informational, commercial, navigational, transactional)
5. Keyword difficulty assessment (1-10 scale)
6. Content gaps (what competitors are missing)
7. Semantic keywords (related concepts)
8. Trending terms (emerging keywords)
Respond with JSON:
{{
"primary": ["keyword1", "keyword2"],
"secondary": ["related1", "related2"],
"long_tail": ["specific phrase 1", "specific phrase 2"],
"search_intent": "informational|commercial|navigational|transactional",
"difficulty": 7,
"content_gaps": ["gap1", "gap2"],
"semantic_keywords": ["concept1", "concept2"],
"trending_terms": ["trend1", "trend2"],
"analysis_insights": "Brief analysis of keyword landscape"
}}
"""
from services.llm_providers.gemini_provider import gemini_structured_json_response
keyword_schema = {
"type": "object",
"properties": {
"primary": {"type": "array", "items": {"type": "string"}},
"secondary": {"type": "array", "items": {"type": "string"}},
"long_tail": {"type": "array", "items": {"type": "string"}},
"search_intent": {"type": "string"},
"difficulty": {"type": "integer"},
"content_gaps": {"type": "array", "items": {"type": "string"}},
"semantic_keywords": {"type": "array", "items": {"type": "string"}},
"trending_terms": {"type": "array", "items": {"type": "string"}},
"analysis_insights": {"type": "string"}
},
"required": ["primary", "secondary", "long_tail", "search_intent", "difficulty", "content_gaps", "semantic_keywords", "trending_terms", "analysis_insights"]
}
keyword_analysis = gemini_structured_json_response(
prompt=keyword_prompt,
schema=keyword_schema,
temperature=0.3,
max_tokens=1000
)
if isinstance(keyword_analysis, dict) and 'error' not in keyword_analysis:
return keyword_analysis
else:
# Fail gracefully - no fallback data
logger.error(f"AI keyword analysis failed: {keyword_analysis}")
raise ValueError(f"Keyword analysis failed: {keyword_analysis.get('error', 'Unknown error')}")

View File

@@ -0,0 +1,268 @@
"""
Research Service - Core research functionality for AI Blog Writer.
Handles Google Search grounding, caching, and research orchestration.
"""
from typing import Dict, Any, List
from loguru import logger
from models.blog_models import (
BlogResearchRequest,
BlogResearchResponse,
ResearchSource,
)
from .keyword_analyzer import KeywordAnalyzer
from .competitor_analyzer import CompetitorAnalyzer
from .content_angle_generator import ContentAngleGenerator
class ResearchService:
"""Service for conducting comprehensive research using Google Search grounding."""
def __init__(self):
self.keyword_analyzer = KeywordAnalyzer()
self.competitor_analyzer = CompetitorAnalyzer()
self.content_angle_generator = ContentAngleGenerator()
async def research(self, request: BlogResearchRequest) -> BlogResearchResponse:
"""
Stage 1: Research & Strategy (AI Orchestration)
Uses ONLY Gemini's native Google Search grounding - ONE API call for everything.
Follows LinkedIn service pattern for efficiency and cost optimization.
Includes intelligent caching for exact keyword matches.
"""
try:
from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
from services.cache.research_cache import research_cache
topic = request.topic or ", ".join(request.keywords)
industry = request.industry or (request.persona.industry if request.persona and request.persona.industry else "General")
target_audience = getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'
# Check cache first for exact keyword match
cached_result = research_cache.get_cached_result(
keywords=request.keywords,
industry=industry,
target_audience=target_audience
)
if cached_result:
logger.info(f"Returning cached research result for keywords: {request.keywords}")
return BlogResearchResponse(**cached_result)
# Cache miss - proceed with API call
logger.info(f"Cache miss - making API call for keywords: {request.keywords}")
gemini = GeminiGroundedProvider()
# Single comprehensive research prompt - Gemini handles Google Search automatically
research_prompt = f"""
Research the topic "{topic}" in the {industry} industry for {target_audience} audience. Provide a comprehensive analysis including:
1. Current trends and insights (2024-2025)
2. Key statistics and data points with sources
3. Industry expert opinions and quotes
4. Recent developments and news
5. Market analysis and forecasts
6. Best practices and case studies
7. Keyword analysis: primary, secondary, and long-tail opportunities
8. Competitor analysis: top players and content gaps
9. Content angle suggestions: 5 compelling angles for blog posts
Focus on factual, up-to-date information from credible sources.
Include specific data points, percentages, and recent developments.
Structure your response with clear sections for each analysis area.
"""
# Single Gemini call with native Google Search grounding - no fallbacks
gemini_result = await gemini.generate_grounded_content(
prompt=research_prompt,
content_type="research",
max_tokens=2000
)
# Extract sources from grounding metadata
sources = self._extract_sources_from_grounding(gemini_result)
# Extract search widget and queries for UI display
search_widget = gemini_result.get("search_widget", "") or ""
search_queries = gemini_result.get("search_queries", []) or []
# Parse the comprehensive response for different analysis components
content = gemini_result.get("content", "")
keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords)
competitor_analysis = self.competitor_analyzer.analyze(content)
suggested_angles = self.content_angle_generator.generate(content, topic, industry)
logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
# Create the response
response = BlogResearchResponse(
success=True,
sources=sources,
keyword_analysis=keyword_analysis,
competitor_analysis=competitor_analysis,
suggested_angles=suggested_angles,
# Add search widget and queries for UI display
search_widget=search_widget if 'search_widget' in locals() else "",
search_queries=search_queries if 'search_queries' in locals() else [],
)
# Cache the successful result for future exact keyword matches
research_cache.cache_result(
keywords=request.keywords,
industry=industry,
target_audience=target_audience,
result=response.dict()
)
return response
except Exception as e:
error_message = str(e)
logger.error(f"Research failed: {error_message}")
# Return a graceful failure response instead of raising
return BlogResearchResponse(
success=False,
sources=[],
keyword_analysis={},
competitor_analysis={},
suggested_angles=[],
search_widget="",
search_queries=[],
error_message=error_message
)
async def research_with_progress(self, request: BlogResearchRequest, task_id: str) -> BlogResearchResponse:
"""
Research method with progress updates for real-time feedback.
"""
try:
from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
from services.cache.research_cache import research_cache
from api.blog_writer.router import _update_progress
topic = request.topic or ", ".join(request.keywords)
industry = request.industry or (request.persona.industry if request.persona and request.persona.industry else "General")
target_audience = getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'
# Check cache first for exact keyword match
await _update_progress(task_id, "🔍 Checking cache for existing research...")
cached_result = research_cache.get_cached_result(
keywords=request.keywords,
industry=industry,
target_audience=target_audience
)
if cached_result:
await _update_progress(task_id, "✅ Found cached research results! Returning instantly...")
logger.info(f"Returning cached research result for keywords: {request.keywords}")
return BlogResearchResponse(**cached_result)
# Cache miss - proceed with API call
await _update_progress(task_id, "🌐 Cache miss - connecting to Google Search grounding...")
logger.info(f"Cache miss - making API call for keywords: {request.keywords}")
gemini = GeminiGroundedProvider()
# Single comprehensive research prompt - Gemini handles Google Search automatically
research_prompt = f"""
Research the topic "{topic}" in the {industry} industry for {target_audience} audience. Provide a comprehensive analysis including:
1. Current trends and insights (2024-2025)
2. Key statistics and data points with sources
3. Industry expert opinions and quotes
4. Recent developments and news
5. Market analysis and forecasts
6. Best practices and case studies
7. Keyword analysis: primary, secondary, and long-tail opportunities
8. Competitor analysis: top players and content gaps
9. Content angle suggestions: 5 compelling angles for blog posts
Focus on factual, up-to-date information from credible sources.
Include specific data points, percentages, and recent developments.
Structure your response with clear sections for each analysis area.
"""
await _update_progress(task_id, "🤖 Making AI request to Gemini with Google Search grounding...")
# Single Gemini call with native Google Search grounding - no fallbacks
gemini_result = await gemini.generate_grounded_content(
prompt=research_prompt,
content_type="research",
max_tokens=2000
)
await _update_progress(task_id, "📊 Processing research results and extracting insights...")
# Extract sources from grounding metadata
sources = self._extract_sources_from_grounding(gemini_result)
# Extract search widget and queries for UI display
search_widget = gemini_result.get("search_widget", "") or ""
search_queries = gemini_result.get("search_queries", []) or []
await _update_progress(task_id, "🔍 Analyzing keywords and content angles...")
# Parse the comprehensive response for different analysis components
content = gemini_result.get("content", "")
keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords)
competitor_analysis = self.competitor_analyzer.analyze(content)
suggested_angles = self.content_angle_generator.generate(content, topic, industry)
await _update_progress(task_id, "💾 Caching results for future use...")
logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
# Create the response
response = BlogResearchResponse(
success=True,
sources=sources,
keyword_analysis=keyword_analysis,
competitor_analysis=competitor_analysis,
suggested_angles=suggested_angles,
# Add search widget and queries for UI display
search_widget=search_widget if 'search_widget' in locals() else "",
search_queries=search_queries if 'search_queries' in locals() else [],
)
# Cache the successful result for future exact keyword matches
research_cache.cache_result(
keywords=request.keywords,
industry=industry,
target_audience=target_audience,
result=response.dict()
)
return response
except Exception as e:
error_message = str(e)
logger.error(f"Research failed: {error_message}")
# Return a graceful failure response instead of raising
return BlogResearchResponse(
success=False,
sources=[],
keyword_analysis={},
competitor_analysis={},
suggested_angles=[],
search_widget="",
search_queries=[],
error_message=error_message
)
def _extract_sources_from_grounding(self, gemini_result: Dict[str, Any]) -> List[ResearchSource]:
"""Extract sources from Gemini grounding metadata."""
sources = []
# The Gemini grounded provider already extracts sources and puts them in the 'sources' field
raw_sources = gemini_result.get("sources", [])
for src in raw_sources:
source = ResearchSource(
title=src.get("title", "Untitled"),
url=src.get("url", ""),
excerpt=src.get("content", "")[:500] if src.get("content") else f"Source from {src.get('title', 'web')}",
credibility_score=float(src.get("credibility_score", 0.8)),
published_at=str(src.get("publication_date", "2024-01-01"))
)
sources.append(source)
return sources

1
backend/services/cache/__init__.py vendored Normal file
View File

@@ -0,0 +1 @@
# Cache services for AI Blog Writer

172
backend/services/cache/research_cache.py vendored Normal file
View File

@@ -0,0 +1,172 @@
"""
Research Cache Service
Provides intelligent caching for Google grounded research results to reduce API costs.
Only returns cached results for exact keyword matches to ensure accuracy.
"""
import hashlib
import json
from typing import Dict, Any, Optional, List
from datetime import datetime, timedelta
from loguru import logger
class ResearchCache:
"""Cache for research results with exact keyword matching."""
def __init__(self, max_cache_size: int = 100, cache_ttl_hours: int = 24):
"""
Initialize the research cache.
Args:
max_cache_size: Maximum number of cached entries
cache_ttl_hours: Time-to-live for cache entries in hours
"""
self.cache: Dict[str, Dict[str, Any]] = {}
self.max_cache_size = max_cache_size
self.cache_ttl = timedelta(hours=cache_ttl_hours)
def _generate_cache_key(self, keywords: List[str], industry: str, target_audience: str) -> str:
"""
Generate a cache key based on exact keyword match.
Args:
keywords: List of research keywords
industry: Industry context
target_audience: Target audience context
Returns:
MD5 hash of the normalized parameters
"""
# Normalize and sort keywords for consistent hashing
normalized_keywords = sorted([kw.lower().strip() for kw in keywords])
normalized_industry = industry.lower().strip() if industry else "general"
normalized_audience = target_audience.lower().strip() if target_audience else "general"
# Create a consistent string representation
cache_string = f"{normalized_keywords}|{normalized_industry}|{normalized_audience}"
# Generate MD5 hash
return hashlib.md5(cache_string.encode('utf-8')).hexdigest()
def _is_cache_entry_valid(self, entry: Dict[str, Any]) -> bool:
"""Check if a cache entry is still valid (not expired)."""
if 'created_at' not in entry:
return False
created_at = datetime.fromisoformat(entry['created_at'])
return datetime.now() - created_at < self.cache_ttl
def _cleanup_expired_entries(self):
"""Remove expired cache entries."""
expired_keys = []
for key, entry in self.cache.items():
if not self._is_cache_entry_valid(entry):
expired_keys.append(key)
for key in expired_keys:
del self.cache[key]
logger.debug(f"Removed expired cache entry: {key}")
def _evict_oldest_entries(self, num_to_evict: int):
"""Evict the oldest cache entries when cache is full."""
# Sort by creation time and remove oldest entries
sorted_entries = sorted(
self.cache.items(),
key=lambda x: x[1].get('created_at', ''),
reverse=False
)
for i in range(min(num_to_evict, len(sorted_entries))):
key = sorted_entries[i][0]
del self.cache[key]
logger.debug(f"Evicted oldest cache entry: {key}")
def get_cached_result(self, keywords: List[str], industry: str, target_audience: str) -> Optional[Dict[str, Any]]:
"""
Get cached research result for exact keyword match.
Args:
keywords: List of research keywords
industry: Industry context
target_audience: Target audience context
Returns:
Cached research result if found and valid, None otherwise
"""
cache_key = self._generate_cache_key(keywords, industry, target_audience)
if cache_key not in self.cache:
logger.debug(f"Cache miss for keywords: {keywords}")
return None
entry = self.cache[cache_key]
# Check if entry is still valid
if not self._is_cache_entry_valid(entry):
del self.cache[cache_key]
logger.debug(f"Cache entry expired for keywords: {keywords}")
return None
logger.info(f"Cache hit for keywords: {keywords} (saved API call)")
return entry.get('result')
def cache_result(self, keywords: List[str], industry: str, target_audience: str, result: Dict[str, Any]):
"""
Cache a research result.
Args:
keywords: List of research keywords
industry: Industry context
target_audience: Target audience context
result: Research result to cache
"""
cache_key = self._generate_cache_key(keywords, industry, target_audience)
# Cleanup expired entries first
self._cleanup_expired_entries()
# Check if cache is full and evict if necessary
if len(self.cache) >= self.max_cache_size:
num_to_evict = len(self.cache) - self.max_cache_size + 1
self._evict_oldest_entries(num_to_evict)
# Store the result
self.cache[cache_key] = {
'result': result,
'created_at': datetime.now().isoformat(),
'keywords': keywords,
'industry': industry,
'target_audience': target_audience
}
logger.info(f"Cached research result for keywords: {keywords}")
def get_cache_stats(self) -> Dict[str, Any]:
"""Get cache statistics."""
self._cleanup_expired_entries()
return {
'total_entries': len(self.cache),
'max_size': self.max_cache_size,
'ttl_hours': self.cache_ttl.total_seconds() / 3600,
'entries': [
{
'keywords': entry['keywords'],
'industry': entry['industry'],
'target_audience': entry['target_audience'],
'created_at': entry['created_at']
}
for entry in self.cache.values()
]
}
def clear_cache(self):
"""Clear all cached entries."""
self.cache.clear()
logger.info("Research cache cleared")
# Global cache instance
research_cache = ResearchCache()

View File

@@ -9,6 +9,8 @@ Based on Google AI's official grounding documentation.
import os
import json
import re
import time
import asyncio
from typing import List, Dict, Any, Optional
from datetime import datetime
from loguru import logger
@@ -104,6 +106,29 @@ class GeminiGroundedProvider:
)
except asyncio.TimeoutError:
raise Exception(f"Gemini API request timed out after {self.timeout} seconds")
except Exception as api_error:
# Handle specific Google API errors with retry logic
error_str = str(api_error)
if "503" in error_str and "overloaded" in error_str:
# Conservative retry for overloaded service (expensive API calls)
response = await self._retry_with_backoff(
lambda: self._make_api_request(grounded_prompt, config),
max_retries=1, # Only 1 retry to avoid excessive costs
base_delay=5 # Longer delay
)
elif "429" in error_str:
# Conservative retry for rate limits
response = await self._retry_with_backoff(
lambda: self._make_api_request(grounded_prompt, config),
max_retries=1, # Only 1 retry
base_delay=10 # Much longer delay for rate limits
)
elif "401" in error_str or "403" in error_str:
raise Exception("Authentication failed. Please check your API credentials.")
elif "400" in error_str:
raise Exception("Invalid request. Please check your input parameters.")
else:
raise Exception(f"Google AI service error: {error_str}")
# Process the grounded response
result = self._process_grounded_response(response, content_type)
@@ -112,9 +137,47 @@ class GeminiGroundedProvider:
return result
except Exception as e:
logger.error(f"❌ Error generating grounded content: {str(e)}")
# Log error without causing secondary exceptions
try:
logger.error(f"❌ Error generating grounded content: {str(e)}")
except:
# Fallback to print if logging fails
print(f"Error generating grounded content: {str(e)}")
raise
async def _make_api_request(self, grounded_prompt: str, config: Any):
"""Make the actual API request to Gemini."""
import concurrent.futures
loop = asyncio.get_event_loop()
with concurrent.futures.ThreadPoolExecutor() as executor:
return await asyncio.wait_for(
loop.run_in_executor(
executor,
lambda: self.client.models.generate_content(
model="gemini-2.5-flash",
contents=grounded_prompt,
config=config,
)
),
timeout=self.timeout
)
async def _retry_with_backoff(self, func, max_retries: int = 3, base_delay: float = 1.0):
"""Retry a function with exponential backoff."""
for attempt in range(max_retries + 1):
try:
return await func()
except Exception as e:
if attempt == max_retries:
# Last attempt failed, raise the error
raise e
# Calculate delay with exponential backoff
delay = base_delay * (2 ** attempt)
logger.warning(f"Attempt {attempt + 1} failed, retrying in {delay} seconds: {str(e)}")
await asyncio.sleep(delay)
def _build_grounded_prompt(self, prompt: str, content_type: str) -> str:
"""
Build a prompt optimized for grounded content generation.

View File

@@ -389,11 +389,37 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
config=generation_config,
)
# According to the documentation, we should use response.parsed for structured output
# Check for parsed content first (primary method for structured output)
if hasattr(response, 'parsed') and response.parsed is not None:
logger.info("Using response.parsed for structured output")
return response.parsed
# Check for text content as fallback
if hasattr(response, 'text') and response.text:
logger.info("No parsed content, trying to parse text response")
try:
import json
parsed_text = json.loads(response.text)
logger.info("Successfully parsed text as JSON")
return parsed_text
except json.JSONDecodeError as e:
logger.error(f"Failed to parse text as JSON: {e}")
# Check candidates for content (fallback for edge cases)
if hasattr(response, 'candidates') and response.candidates:
candidate = response.candidates[0]
if hasattr(candidate, 'content') and candidate.content:
if hasattr(candidate.content, 'parts') and candidate.content.parts:
for part in candidate.content.parts:
if hasattr(part, 'text') and part.text:
try:
import json
parsed_text = json.loads(part.text)
logger.info("Successfully parsed candidate text as JSON")
return parsed_text
except json.JSONDecodeError as e:
logger.error(f"Failed to parse candidate text as JSON: {e}")
logger.error("No valid structured response content found")
return {"error": "No valid structured response content found"}