Files
ALwrity/backend/services/blog_writer/outline/outline_generator.py

352 lines
17 KiB
Python

"""
Outline Generator - AI-powered outline generation from research data.
Generates comprehensive, SEO-optimized outlines using research intelligence.
"""
from typing import Dict, Any, List
import asyncio
from loguru import logger
from models.blog_models import (
BlogOutlineRequest,
BlogOutlineResponse,
BlogOutlineSection,
)
class OutlineGenerator:
"""Generates AI-powered outlines from research data."""
async def generate(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
"""
Generate AI-powered outline using research results
"""
# Extract research insights
research = request.research
primary_keywords = research.keyword_analysis.get('primary', [])
secondary_keywords = research.keyword_analysis.get('secondary', [])
content_angles = research.suggested_angles
sources = research.sources
search_intent = research.keyword_analysis.get('search_intent', 'informational')
# Check for custom instructions
custom_instructions = getattr(request, 'custom_instructions', None)
# Build comprehensive outline generation prompt with rich research data
outline_prompt = self._build_outline_prompt(
primary_keywords, secondary_keywords, content_angles, sources,
search_intent, request, custom_instructions
)
logger.info("Generating AI-powered outline using research results")
# Define schema with proper property ordering (critical for Gemini API)
outline_schema = self._get_outline_schema()
# Generate outline using structured JSON response with retry logic
outline_data = await self._generate_with_retry(outline_prompt, outline_schema)
# Convert to BlogOutlineSection objects
outline_sections = self._convert_to_sections(outline_data, sources)
# Extract title options
title_options = outline_data.get('title_options', [])
if not title_options:
title_options = self._generate_fallback_titles(primary_keywords)
logger.info(f"Generated outline with {len(outline_sections)} sections and {len(title_options)} title options")
return BlogOutlineResponse(
success=True,
title_options=title_options,
outline=outline_sections
)
async def generate_with_progress(self, request: BlogOutlineRequest, task_id: str) -> BlogOutlineResponse:
"""
Outline generation method with progress updates for real-time feedback.
"""
from api.blog_writer.router import _update_progress
# Extract research insights
research = request.research
primary_keywords = research.keyword_analysis.get('primary', [])
secondary_keywords = research.keyword_analysis.get('secondary', [])
content_angles = research.suggested_angles
sources = research.sources
search_intent = research.keyword_analysis.get('search_intent', 'informational')
# Check for custom instructions
custom_instructions = getattr(request, 'custom_instructions', None)
await _update_progress(task_id, "📊 Analyzing research data and building content strategy...")
# Build comprehensive outline generation prompt with rich research data
outline_prompt = self._build_outline_prompt(
primary_keywords, secondary_keywords, content_angles, sources,
search_intent, request, custom_instructions
)
await _update_progress(task_id, "🤖 Generating AI-powered outline with research insights...")
# Define schema with proper property ordering (critical for Gemini API)
outline_schema = self._get_outline_schema()
await _update_progress(task_id, "🔄 Making AI request to generate structured outline...")
# Generate outline using structured JSON response with retry logic
outline_data = await self._generate_with_retry(outline_prompt, outline_schema, task_id)
await _update_progress(task_id, "📝 Processing outline structure and validating sections...")
# Convert to BlogOutlineSection objects
outline_sections = self._convert_to_sections(outline_data, sources)
# Extract title options
title_options = outline_data.get('title_options', [])
if not title_options:
title_options = self._generate_fallback_titles(primary_keywords)
await _update_progress(task_id, "✅ Outline generation completed successfully!")
return BlogOutlineResponse(
success=True,
title_options=title_options,
outline=outline_sections
)
def _build_outline_prompt(self, primary_keywords: List[str], secondary_keywords: List[str],
content_angles: List[str], sources: List, search_intent: str,
request: BlogOutlineRequest, custom_instructions: str = None) -> str:
"""Build the comprehensive outline generation prompt."""
return f"""
You are a world-class content strategist and SEO expert with 15+ years of experience creating viral, high-converting blog content. Your outlines have generated millions of views and driven significant business results.
CONTENT STRATEGY BRIEF:
Topic: {', '.join(primary_keywords)}
Search Intent: {search_intent}
Target Word Count: {request.word_count or 1500} words
Industry Context: {getattr(request.persona, 'industry', 'General') if request.persona else 'General'}
Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}
{f"CUSTOM USER INSTRUCTIONS: {custom_instructions}" if custom_instructions else ""}
RESEARCH INTELLIGENCE:
Primary Keywords: {', '.join(primary_keywords)}
Secondary Keywords: {', '.join(secondary_keywords)}
Long-tail Opportunities: {', '.join(request.research.keyword_analysis.get('long_tail', [])[:5])}
Semantic Keywords: {', '.join(request.research.keyword_analysis.get('semantic_keywords', [])[:5])}
Trending Terms: {', '.join(request.research.keyword_analysis.get('trending_terms', [])[:3])}
Keyword Difficulty: {request.research.keyword_analysis.get('difficulty', 6)}/10
Content Gaps: {', '.join(request.research.keyword_analysis.get('content_gaps', [])[:3])}
Content Angles Discovered:
{chr(10).join([f"{angle}" for angle in content_angles[:6]])}
Competitive Intelligence:
Top Competitors: {', '.join(request.research.competitor_analysis.get('top_competitors', [])[:3])}
Market Opportunities: {', '.join(request.research.competitor_analysis.get('opportunities', [])[:3])}
Competitive Advantages: {', '.join(request.research.competitor_analysis.get('competitive_advantages', [])[:3])}
Market Positioning: {request.research.competitor_analysis.get('market_positioning', 'Standard positioning')}
Research Sources Available: {len(sources)} authoritative sources with current data
Key Statistics Available: Multiple data points, percentages, and expert quotes from credible sources
STRATEGIC OUTLINE REQUIREMENTS:
{f"CUSTOM REQUIREMENTS: {custom_instructions}" if custom_instructions else ""}
1. CONTENT ARCHITECTURE:
- Create a logical, engaging narrative arc that guides readers from problem to solution
- Structure content to build authority and trust progressively
- Include data-driven insights and expert opinions from research
- Ensure each section adds unique value and builds upon previous sections
2. SEO OPTIMIZATION:
- Naturally integrate primary keywords in headings and content
- Use secondary keywords strategically throughout sections
- Include long-tail keywords in subheadings and key points
- Optimize for featured snippets and voice search
3. READER ENGAGEMENT:
- Start with compelling hooks and pain points
- Use storytelling elements and real-world examples
- Include actionable insights and practical takeaways
- End with clear next steps and calls-to-action
4. CONTENT DEPTH:
- Provide comprehensive coverage of the topic
- Include multiple perspectives and expert insights
- Address common questions and objections
- Offer unique angles not covered by competitors
5. WORD COUNT DISTRIBUTION:
- Introduction: 12% of total word count
- Main content sections: 76% of total word count
- Conclusion: 12% of total word count
- Ensure balanced section lengths for optimal readability
6. COMPETITIVE ADVANTAGE:
- Leverage content gaps identified in research
- Include unique data points and statistics
- Provide fresh perspectives on trending topics
- Address underserved audience segments
TITLE STRATEGY:
Create 5 compelling title options that:
- Include primary keywords naturally
- Promise clear value and outcomes
- Appeal to the target audience's pain points
- Stand out from competitor content
- Optimize for click-through rates
Generate a comprehensive outline with the following structure:
{{
"title_options": [
"Title 1 with primary keyword",
"Title 2 with emotional hook",
"Title 3 with benefit-focused approach",
"Title 4 with question format",
"Title 5 with urgency/trending angle"
],
"outline": [
{{
"heading": "Section heading with primary keyword",
"subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
"key_points": ["Key point 1", "Key point 2", "Key point 3"],
"word_count": 300,
"keywords": ["primary keyword", "secondary keyword"]
}}
]
}}
"""
def _get_outline_schema(self) -> Dict[str, Any]:
"""Get the structured JSON schema for outline generation."""
return {
"type": "object",
"properties": {
"title_options": {
"type": "array",
"items": {"type": "string"}
},
"outline": {
"type": "array",
"items": {
"type": "object",
"properties": {
"heading": {"type": "string"},
"subheadings": {
"type": "array",
"items": {"type": "string"}
},
"key_points": {
"type": "array",
"items": {"type": "string"}
},
"word_count": {"type": "integer"},
"keywords": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["heading", "subheadings", "key_points", "word_count", "keywords"]
}
}
},
"required": ["title_options", "outline"],
"propertyOrdering": ["title_options", "outline"]
}
async def _generate_with_retry(self, prompt: str, schema: Dict[str, Any], task_id: str = None) -> Dict[str, Any]:
"""Generate outline with retry logic for API failures."""
from services.llm_providers.gemini_provider import gemini_structured_json_response
from api.blog_writer.router import _update_progress
max_retries = 2 # Conservative retry for expensive API calls
retry_delay = 5 # 5 second delay between retries
for attempt in range(max_retries + 1):
try:
if task_id:
await _update_progress(task_id, f"🤖 Calling Gemini API for outline generation (attempt {attempt + 1}/{max_retries + 1})...")
outline_data = gemini_structured_json_response(
prompt=prompt,
schema=schema,
temperature=0.3,
max_tokens=4000 # Increased to avoid MAX_TOKENS truncation
)
# Log response for debugging
logger.info(f"Gemini response received: {type(outline_data)}")
# Check for errors in the response
if isinstance(outline_data, dict) and 'error' in outline_data:
error_msg = str(outline_data['error'])
if "503" in error_msg and "overloaded" in error_msg and attempt < max_retries:
if task_id:
await _update_progress(task_id, f"⚠️ AI service overloaded, retrying in {retry_delay} seconds...")
logger.warning(f"Gemini API overloaded, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
await asyncio.sleep(retry_delay)
continue
else:
logger.error(f"Gemini structured response error: {outline_data['error']}")
raise ValueError(f"AI outline generation failed: {outline_data['error']}")
# Validate required fields
if not isinstance(outline_data, dict) or 'outline' not in outline_data or not isinstance(outline_data['outline'], list):
if attempt < max_retries:
if task_id:
await _update_progress(task_id, f"⚠️ Invalid response structure, retrying in {retry_delay} seconds...")
logger.warning(f"Invalid response structure, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
await asyncio.sleep(retry_delay)
continue
else:
raise ValueError("Invalid outline structure in Gemini response")
# If we get here, the response is valid
return outline_data
except Exception as e:
error_str = str(e)
if ("503" in error_str or "overloaded" in error_str) and attempt < max_retries:
if task_id:
await _update_progress(task_id, f"⚠️ AI service error, retrying in {retry_delay} seconds...")
logger.warning(f"Gemini API error, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1}): {error_str}")
await asyncio.sleep(retry_delay)
continue
else:
logger.error(f"Outline generation failed after {attempt + 1} attempts: {error_str}")
raise ValueError(f"AI outline generation failed: {error_str}")
def _convert_to_sections(self, outline_data: Dict[str, Any], sources: List) -> List[BlogOutlineSection]:
"""Convert outline data to BlogOutlineSection objects."""
outline_sections = []
for i, section_data in enumerate(outline_data.get('outline', [])):
if not isinstance(section_data, dict) or 'heading' not in section_data:
continue
section = BlogOutlineSection(
id=f"s{i+1}",
heading=section_data.get('heading', f'Section {i+1}'),
subheadings=section_data.get('subheadings', []),
key_points=section_data.get('key_points', []),
references=sources[:3], # Use first 3 sources as references
target_words=section_data.get('word_count', 200),
keywords=section_data.get('keywords', [])
)
outline_sections.append(section)
return outline_sections
def _generate_fallback_titles(self, primary_keywords: List[str]) -> List[str]:
"""Generate fallback titles when AI generation fails."""
primary_keyword = primary_keywords[0] if primary_keywords else "Topic"
return [
f"The Complete Guide to {primary_keyword}",
f"{primary_keyword}: Everything You Need to Know",
f"How to Master {primary_keyword} in 2024"
]