Added image generation to blog writer

This commit is contained in:
ajaysi
2025-10-31 15:59:16 +05:30
parent 3219e6bbe4
commit cdb41aec1b
80 changed files with 7662 additions and 3951 deletions

View File

@@ -1,12 +1,14 @@
"""
EnhancedContentGenerator - thin orchestrator combining URL selection and Gemini provider.
EnhancedContentGenerator - thin orchestrator for section generation.
Provides Draft vs Polished modes and optional URL Context usage.
Provider parity:
- Uses main_text_generation.llm_text_gen to respect GPT_PROVIDER (Gemini/HF)
- No direct provider coupling here; Google grounding remains in research only
"""
from typing import Any, Dict
from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
from services.llm_providers.main_text_generation import llm_text_gen
from .source_url_manager import SourceURLManager
from .context_memory import ContextMemory
from .transition_generator import TransitionGenerator
@@ -15,24 +17,37 @@ from .flow_analyzer import FlowAnalyzer
class EnhancedContentGenerator:
def __init__(self):
self.provider = GeminiGroundedProvider()
self.url_manager = SourceURLManager()
self.memory = ContextMemory(max_entries=12)
self.transitioner = TransitionGenerator()
self.flow = FlowAnalyzer()
async def generate_section(self, section: Any, research: Any, mode: str = "polished") -> Dict[str, Any]:
urls = self.url_manager.pick_relevant_urls(section, research)
prev_summary = self.memory.build_previous_sections_summary(limit=2)
prompt = self._build_prompt(section, research, prev_summary)
result = await self.provider.generate_grounded_content(
prompt=prompt,
content_type="linkedin_article",
temperature=0.6 if mode == "polished" else 0.8,
max_tokens=2048,
urls=urls,
mode=mode,
)
urls = self.url_manager.pick_relevant_urls(section, research)
prompt = self._build_prompt(section, research, prev_summary, urls)
# Provider-agnostic text generation (respect GPT_PROVIDER & circuit-breaker)
content_text: str = ""
try:
ai_resp = llm_text_gen(
prompt=prompt,
json_struct=None,
system_prompt=None,
)
if isinstance(ai_resp, dict) and ai_resp.get("text"):
content_text = ai_resp.get("text", "")
elif isinstance(ai_resp, str):
content_text = ai_resp
else:
# Fallback best-effort extraction
content_text = str(ai_resp or "")
except Exception as e:
content_text = ""
result = {
"content": content_text,
"sources": [{"title": u.get("title", ""), "url": u.get("url", "")} for u in urls] if urls else [],
}
# Generate transition and compute intelligent flow metrics
previous_text = prev_summary
current_text = result.get("content", "")
@@ -56,19 +71,22 @@ class EnhancedContentGenerator:
pass
return result
def _build_prompt(self, section: Any, research: Any, prev_summary: str) -> str:
def _build_prompt(self, section: Any, research: Any, prev_summary: str, urls: list) -> str:
heading = getattr(section, 'heading', 'Section')
key_points = getattr(section, 'key_points', [])
keywords = getattr(section, 'keywords', [])
target_words = getattr(section, 'target_words', 300)
url_block = "\n".join([f"- {u.get('title','')} ({u.get('url','')})" for u in urls]) if urls else "(no specific URLs provided)"
return (
f"You are writing the blog section '{heading}'.\n\n"
f"Context summary: {prev_summary}\n"
f"Key points: {', '.join(key_points)}\n"
f"Keywords: {', '.join(keywords)}\n"
f"Target word count: {target_words}.\n"
"Use only factual info from provided sources; add short transition, then body."
f"Context summary (previous sections): {prev_summary}\n\n"
f"Authoring requirements:\n"
f"- Target word count: ~{target_words}\n"
f"- Use the following key points: {', '.join(key_points)}\n"
f"- Include these keywords naturally: {', '.join(keywords)}\n"
f"- Cite insights from these sources when relevant (do not output raw URLs):\n{url_block}\n\n"
"Write engaging, well-structured markdown with clear paragraphs (2-4 sentences each) separated by double line breaks."
)

View File

@@ -15,7 +15,7 @@ from models.blog_models import (
MediumGeneratedSection,
ResearchSource,
)
from services.llm_providers.gemini_provider import gemini_structured_json_response
from services.llm_providers.main_text_generation import llm_text_gen
from services.cache.persistent_content_cache import persistent_content_cache
@@ -176,11 +176,9 @@ class MediumBlogGenerator:
f"Sections to write:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
)
ai_resp = gemini_structured_json_response(
ai_resp = llm_text_gen(
prompt=prompt,
schema=schema,
temperature=0.2,
max_tokens=8192,
json_struct=schema,
system_prompt=system,
)

View File

@@ -275,11 +275,17 @@ class BlogWriterService:
# Initialize metadata generator
metadata_generator = BlogSEOMetadataGenerator()
# Generate comprehensive metadata
# Extract outline and seo_analysis from request
outline = request.outline if hasattr(request, 'outline') else None
seo_analysis = request.seo_analysis if hasattr(request, 'seo_analysis') else None
# Generate comprehensive metadata with full context
metadata_results = await metadata_generator.generate_comprehensive_metadata(
blog_content=request.content,
blog_title=request.title or "Untitled Blog Post",
research_data=request.research_data or {}
research_data=request.research_data or {},
outline=outline,
seo_analysis=seo_analysis
)
# Convert to BlogSEOMetadataResponse format

View File

@@ -40,7 +40,7 @@ Return JSON format:
}}"""
try:
from services.llm_providers.gemini_provider import gemini_structured_json_response
from services.llm_providers.main_text_generation import llm_text_gen
optimization_schema = {
"type": "object",
@@ -64,11 +64,10 @@ Return JSON format:
"propertyOrdering": ["outline"]
}
optimized_data = gemini_structured_json_response(
optimized_data = llm_text_gen(
prompt=optimization_prompt,
schema=optimization_schema,
temperature=0.3,
max_tokens=6000 # Match main outline generator
json_struct=optimization_schema,
system_prompt=None
)
# Handle the new schema format with "outline" wrapper

View File

@@ -20,7 +20,7 @@ class ResponseProcessor:
async def generate_with_retry(self, prompt: str, schema: Dict[str, Any], task_id: str = None) -> Dict[str, Any]:
"""Generate outline with retry logic for API failures."""
from services.llm_providers.gemini_provider import gemini_structured_json_response
from services.llm_providers.main_text_generation import llm_text_gen
from api.blog_writer.task_manager import task_manager
max_retries = 2 # Conservative retry for expensive API calls
@@ -29,17 +29,16 @@ class ResponseProcessor:
for attempt in range(max_retries + 1):
try:
if task_id:
await task_manager.update_progress(task_id, f"🤖 Calling Gemini API for outline generation (attempt {attempt + 1}/{max_retries + 1})...")
await task_manager.update_progress(task_id, f"🤖 Calling AI API for outline generation (attempt {attempt + 1}/{max_retries + 1})...")
outline_data = gemini_structured_json_response(
outline_data = llm_text_gen(
prompt=prompt,
schema=schema,
temperature=0.3,
max_tokens=6000 # Increased further to avoid truncation
json_struct=schema,
system_prompt=None
)
# Log response for debugging
logger.info(f"Gemini response received: {type(outline_data)}")
logger.info(f"AI response received: {type(outline_data)}")
# Check for errors in the response
if isinstance(outline_data, dict) and 'error' in outline_data:
@@ -47,17 +46,17 @@ class ResponseProcessor:
if "503" in error_msg and "overloaded" in error_msg and attempt < max_retries:
if task_id:
await task_manager.update_progress(task_id, f"⚠️ AI service overloaded, retrying in {retry_delay} seconds...")
logger.warning(f"Gemini API overloaded, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
logger.warning(f"AI API overloaded, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
await asyncio.sleep(retry_delay)
continue
elif "No valid structured response content found" in error_msg and attempt < max_retries:
if task_id:
await task_manager.update_progress(task_id, f"⚠️ Invalid response format, retrying in {retry_delay} seconds...")
logger.warning(f"Gemini response parsing failed, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
logger.warning(f"AI response parsing failed, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
await asyncio.sleep(retry_delay)
continue
else:
logger.error(f"Gemini structured response error: {outline_data['error']}")
logger.error(f"AI structured response error: {outline_data['error']}")
raise ValueError(f"AI outline generation failed: {outline_data['error']}")
# Validate required fields
@@ -69,7 +68,7 @@ class ResponseProcessor:
await asyncio.sleep(retry_delay)
continue
else:
raise ValueError("Invalid outline structure in Gemini response")
raise ValueError("Invalid outline structure in AI response")
# If we get here, the response is valid
return outline_data
@@ -79,7 +78,7 @@ class ResponseProcessor:
if ("503" in error_str or "overloaded" in error_str) and attempt < max_retries:
if task_id:
await task_manager.update_progress(task_id, f"⚠️ AI service error, retrying in {retry_delay} seconds...")
logger.warning(f"Gemini API error, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1}): {error_str}")
logger.warning(f"AI API error, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1}): {error_str}")
await asyncio.sleep(retry_delay)
continue
else:

View File

@@ -44,7 +44,7 @@ class SectionEnhancer:
"""
try:
from services.llm_providers.gemini_provider import gemini_structured_json_response
from services.llm_providers.main_text_generation import llm_text_gen
enhancement_schema = {
"type": "object",
@@ -58,11 +58,10 @@ class SectionEnhancer:
"required": ["heading", "subheadings", "key_points", "target_words", "keywords"]
}
enhanced_data = gemini_structured_json_response(
enhanced_data = llm_text_gen(
prompt=enhancement_prompt,
schema=enhancement_schema,
temperature=0.4,
max_tokens=1000
json_struct=enhancement_schema,
system_prompt=None
)
if isinstance(enhanced_data, dict) and 'error' not in enhanced_data:

View File

@@ -559,14 +559,11 @@ Analyze the mapping and provide your recommendations.
AI validation response
"""
try:
from services.llm_providers.gemini_provider import gemini_text_response
from services.llm_providers.main_text_generation import llm_text_gen
response = gemini_text_response(
response = llm_text_gen(
prompt=prompt,
temperature=0.3,
top_p=0.9,
n=1,
max_tokens=2000,
json_struct=None,
system_prompt=None
)

View File

@@ -10,13 +10,13 @@ import re
import textstat
from datetime import datetime
from typing import Dict, Any, List, Optional
from loguru import logger
from utils.logger_utils import get_service_logger
from services.seo_analyzer import (
ContentAnalyzer, KeywordAnalyzer,
URLStructureAnalyzer, AIInsightGenerator
)
from services.llm_providers.gemini_provider import gemini_structured_json_response
from services.llm_providers.main_text_generation import llm_text_gen
class BlogContentSEOAnalyzer:
@@ -24,11 +24,13 @@ class BlogContentSEOAnalyzer:
def __init__(self):
"""Initialize the blog content SEO analyzer"""
# Service-specific logger (no global reconfiguration)
global logger
logger = get_service_logger("blog_content_seo_analyzer")
self.content_analyzer = ContentAnalyzer()
self.keyword_analyzer = KeywordAnalyzer()
self.url_analyzer = URLStructureAnalyzer()
self.ai_insights = AIInsightGenerator()
self.gemini_provider = gemini_structured_json_response
logger.info("BlogContentSEOAnalyzer initialized")
@@ -598,7 +600,7 @@ class BlogContentSEOAnalyzer:
return recommendations
async def _run_ai_analysis(self, blog_content: str, keywords_data: Dict[str, Any], non_ai_results: Dict[str, Any]) -> Dict[str, Any]:
"""Run single AI analysis for structured insights"""
"""Run single AI analysis for structured insights (provider-agnostic)"""
try:
# Prepare context for AI analysis
context = {
@@ -610,7 +612,6 @@ class BlogContentSEOAnalyzer:
# Create AI prompt for structured analysis
prompt = self._create_ai_analysis_prompt(context)
# Get structured response from Gemini
schema = {
"type": "object",
"properties": {
@@ -653,18 +654,17 @@ class BlogContentSEOAnalyzer:
}
}
ai_response = self.gemini_provider(
# Provider-agnostic structured response respecting GPT_PROVIDER
ai_response = llm_text_gen(
prompt=prompt,
schema=schema,
temperature=0.2,
max_tokens=8192
json_struct=schema,
system_prompt=None
)
return ai_response
except Exception as e:
logger.error(f"AI analysis failed: {e}")
# Fail fast - don't return mock data
raise e
def _create_ai_analysis_prompt(self, context: Dict[str, Any]) -> str:

View File

@@ -12,7 +12,7 @@ from datetime import datetime
from typing import Dict, Any, List, Optional
from loguru import logger
from services.llm_providers.gemini_provider import gemini_structured_json_response
from services.llm_providers.main_text_generation import llm_text_gen
class BlogSEOMetadataGenerator:
@@ -20,14 +20,15 @@ class BlogSEOMetadataGenerator:
def __init__(self):
"""Initialize the metadata generator"""
self.gemini_provider = gemini_structured_json_response
logger.info("BlogSEOMetadataGenerator initialized")
async def generate_comprehensive_metadata(
self,
blog_content: str,
blog_title: str,
research_data: Dict[str, Any]
research_data: Dict[str, Any],
outline: Optional[List[Dict[str, Any]]] = None,
seo_analysis: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Generate comprehensive SEO metadata using maximum 2 AI calls
@@ -36,6 +37,8 @@ class BlogSEOMetadataGenerator:
blog_content: The blog content to analyze
blog_title: The blog title
research_data: Research data containing keywords and insights
outline: Outline structure with sections and headings
seo_analysis: SEO analysis results from previous phase
Returns:
Comprehensive metadata including all SEO elements
@@ -49,11 +52,15 @@ class BlogSEOMetadataGenerator:
# Call 1: Generate core SEO metadata (parallel with Call 2)
logger.info("Generating core SEO metadata")
core_metadata_task = self._generate_core_metadata(blog_content, blog_title, keywords_data)
core_metadata_task = self._generate_core_metadata(
blog_content, blog_title, keywords_data, outline, seo_analysis
)
# Call 2: Generate social media and structured data (parallel with Call 1)
logger.info("Generating social media and structured data")
social_metadata_task = self._generate_social_metadata(blog_content, blog_title, keywords_data)
social_metadata_task = self._generate_social_metadata(
blog_content, blog_title, keywords_data, outline, seo_analysis
)
# Wait for both calls to complete
core_metadata, social_metadata = await asyncio.gather(
@@ -105,12 +112,16 @@ class BlogSEOMetadataGenerator:
self,
blog_content: str,
blog_title: str,
keywords_data: Dict[str, Any]
keywords_data: Dict[str, Any],
outline: Optional[List[Dict[str, Any]]] = None,
seo_analysis: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""Generate core SEO metadata (Call 1)"""
try:
# Create comprehensive prompt for core metadata
prompt = self._create_core_metadata_prompt(blog_content, blog_title, keywords_data)
prompt = self._create_core_metadata_prompt(
blog_content, blog_title, keywords_data, outline, seo_analysis
)
# Define simplified structured schema for core metadata
schema = {
@@ -155,17 +166,26 @@ class BlogSEOMetadataGenerator:
"required": ["seo_title", "meta_description", "url_slug", "blog_tags", "blog_categories", "social_hashtags", "reading_time", "focus_keyword"]
}
# Get structured response from Gemini
ai_response = self.gemini_provider(
prompt,
schema,
temperature=0.3,
max_tokens=2048
# Get structured response using provider-agnostic llm_text_gen
ai_response_raw = llm_text_gen(
prompt=prompt,
json_struct=schema,
system_prompt=None
)
# Handle response: llm_text_gen may return dict (from structured JSON) or str (needs parsing)
ai_response = ai_response_raw
if isinstance(ai_response_raw, str):
try:
import json
ai_response = json.loads(ai_response_raw)
except json.JSONDecodeError:
logger.error(f"Failed to parse JSON response: {ai_response_raw[:200]}...")
ai_response = None
# Check if we got a valid response
if not ai_response or not isinstance(ai_response, dict):
logger.error("Core metadata generation failed: Invalid response from Gemini")
logger.error("Core metadata generation failed: Invalid response from LLM")
# Return fallback response
primary_keywords = ', '.join(keywords_data.get('primary_keywords', ['content']))
word_count = len(blog_content.split())
@@ -193,12 +213,16 @@ class BlogSEOMetadataGenerator:
self,
blog_content: str,
blog_title: str,
keywords_data: Dict[str, Any]
keywords_data: Dict[str, Any],
outline: Optional[List[Dict[str, Any]]] = None,
seo_analysis: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""Generate social media and structured data (Call 2)"""
try:
# Create comprehensive prompt for social metadata
prompt = self._create_social_metadata_prompt(blog_content, blog_title, keywords_data)
prompt = self._create_social_metadata_prompt(
blog_content, blog_title, keywords_data, outline, seo_analysis
)
# Define simplified structured schema for social metadata
schema = {
@@ -246,17 +270,26 @@ class BlogSEOMetadataGenerator:
"required": ["open_graph", "twitter_card", "json_ld_schema"]
}
# Get structured response from Gemini
ai_response = self.gemini_provider(
prompt,
schema,
temperature=0.3,
max_tokens=2048
# Get structured response using provider-agnostic llm_text_gen
ai_response_raw = llm_text_gen(
prompt=prompt,
json_struct=schema,
system_prompt=None
)
# Handle response: llm_text_gen may return dict (from structured JSON) or str (needs parsing)
ai_response = ai_response_raw
if isinstance(ai_response_raw, str):
try:
import json
ai_response = json.loads(ai_response_raw)
except json.JSONDecodeError:
logger.error(f"Failed to parse JSON response: {ai_response_raw[:200]}...")
ai_response = None
# Check if we got a valid response
if not ai_response or not isinstance(ai_response, dict) or not ai_response.get('open_graph') or not ai_response.get('twitter_card') or not ai_response.get('json_ld_schema'):
logger.error("Social metadata generation failed: Invalid or empty response from Gemini")
logger.error("Social metadata generation failed: Invalid or empty response from LLM")
# Return fallback response
return {
'open_graph': {
@@ -301,11 +334,47 @@ class BlogSEOMetadataGenerator:
logger.error(f"Social metadata generation failed: {e}")
raise e
def _extract_content_highlights(self, blog_content: str, max_length: int = 2500) -> str:
"""Extract key sections from blog content for prompt context"""
try:
lines = blog_content.split('\n')
# Get first paragraph (introduction)
intro = ""
for line in lines[:20]:
if line.strip() and not line.strip().startswith('#'):
intro += line.strip() + " "
if len(intro) > 300:
break
# Get section headings
headings = [line.strip() for line in lines if line.strip().startswith('##')][:6]
# Get conclusion if available
conclusion = ""
for line in reversed(lines[-20:]):
if line.strip() and not line.strip().startswith('#'):
conclusion = line.strip() + " " + conclusion
if len(conclusion) > 300:
break
highlights = f"INTRODUCTION: {intro[:300]}...\n\n"
highlights += f"SECTION HEADINGS: {' | '.join([h.replace('##', '').strip() for h in headings])}\n\n"
if conclusion:
highlights += f"CONCLUSION: {conclusion[:300]}..."
return highlights[:max_length]
except Exception as e:
logger.warning(f"Failed to extract content highlights: {e}")
return blog_content[:2000] + "..."
def _create_core_metadata_prompt(
self,
blog_content: str,
blog_title: str,
keywords_data: Dict[str, Any]
keywords_data: Dict[str, Any],
outline: Optional[List[Dict[str, Any]]] = None,
seo_analysis: Optional[Dict[str, Any]] = None
) -> str:
"""Create high-quality prompt for core metadata generation"""
@@ -314,30 +383,106 @@ class BlogSEOMetadataGenerator:
search_intent = keywords_data.get('search_intent', 'informational')
target_audience = keywords_data.get('target_audience', 'general')
industry = keywords_data.get('industry', 'general')
# Calculate word count for reading time estimation
word_count = len(blog_content.split())
# Extract outline structure
outline_context = ""
if outline:
headings = [s.get('heading', '') for s in outline if s.get('heading')]
outline_context = f"""
OUTLINE STRUCTURE:
- Total sections: {len(outline)}
- Section headings: {', '.join(headings[:8])}
- Content hierarchy: Well-structured with {len(outline)} main sections
"""
# Extract SEO analysis insights
seo_context = ""
if seo_analysis:
overall_score = seo_analysis.get('overall_score', seo_analysis.get('seo_score', 0))
category_scores = seo_analysis.get('category_scores', {})
applied_recs = seo_analysis.get('applied_recommendations', [])
seo_context = f"""
SEO ANALYSIS RESULTS:
- Overall SEO Score: {overall_score}/100
- Category Scores: Structure {category_scores.get('structure', category_scores.get('Structure', 0))}, Keywords {category_scores.get('keywords', category_scores.get('Keywords', 0))}, Readability {category_scores.get('readability', category_scores.get('Readability', 0))}
- Applied Recommendations: {len(applied_recs)} SEO optimizations have been applied
- Content Quality: Optimized for search engines with keyword focus
"""
# Get more content context (key sections instead of just first 1000 chars)
content_preview = self._extract_content_highlights(blog_content)
prompt = f"""
Generate SEO metadata for this blog post.
Generate comprehensive, personalized SEO metadata for this blog post.
BLOG TITLE: {blog_title}
BLOG CONTENT: {blog_content[:1000]}...
=== BLOG CONTENT CONTEXT ===
TITLE: {blog_title}
CONTENT PREVIEW (key sections): {content_preview}
WORD COUNT: {word_count} words
READING TIME ESTIMATE: {max(1, word_count // 200)} minutes
{outline_context}
=== KEYWORD & AUDIENCE DATA ===
PRIMARY KEYWORDS: {primary_keywords}
SEMANTIC KEYWORDS: {semantic_keywords}
WORD COUNT: {word_count}
SEARCH INTENT: {search_intent}
TARGET AUDIENCE: {target_audience}
INDUSTRY: {industry}
Generate:
1. SEO TITLE (50-60 characters) - include primary keyword
2. META DESCRIPTION (150-160 characters) - include CTA
3. URL SLUG (lowercase, hyphens, 3-5 words)
4. BLOG TAGS (5-8 relevant tags)
5. BLOG CATEGORIES (2-3 categories)
6. SOCIAL HASHTAGS (5-10 hashtags with #)
7. READING TIME (calculate from {word_count} words)
8. FOCUS KEYWORD (primary keyword for SEO)
{seo_context}
Make it compelling and SEO-optimized.
=== METADATA GENERATION REQUIREMENTS ===
1. SEO TITLE (50-60 characters, must include primary keyword):
- Front-load primary keyword
- Make it compelling and click-worthy
- Include power words if appropriate for {target_audience} audience
- Optimized for {search_intent} search intent
2. META DESCRIPTION (150-160 characters, must include CTA):
- Include primary keyword naturally in first 120 chars
- Add compelling call-to-action (e.g., "Learn more", "Discover how", "Get started")
- Highlight value proposition for {target_audience} audience
- Use {industry} industry-specific terminology where relevant
3. URL SLUG (lowercase, hyphens, 3-5 words):
- Include primary keyword
- Remove stop words
- Keep it concise and readable
4. BLOG TAGS (5-8 relevant tags):
- Mix of primary, semantic, and long-tail keywords
- Industry-specific tags for {industry}
- Audience-relevant tags for {target_audience}
5. BLOG CATEGORIES (2-3 categories):
- Based on content structure and {industry} industry standards
- Reflect main themes from outline sections
6. SOCIAL HASHTAGS (5-10 hashtags with #):
- Include primary keyword as hashtag
- Industry-specific hashtags for {industry}
- Trending/relevant hashtags for {target_audience}
7. READING TIME (calculate from {word_count} words):
- Average reading speed: 200 words/minute
- Round to nearest minute
8. FOCUS KEYWORD (primary keyword for SEO):
- Select the most important primary keyword
- Should match the main topic and search intent
=== QUALITY REQUIREMENTS ===
- All metadata must be unique, not generic
- Incorporate insights from SEO analysis if provided
- Reflect the actual content structure from outline
- Use language appropriate for {target_audience} audience
- Optimize for {search_intent} search intent
- Make descriptions compelling and action-oriented
Generate metadata that is personalized, compelling, and SEO-optimized.
"""
return prompt
@@ -345,7 +490,9 @@ Make it compelling and SEO-optimized.
self,
blog_content: str,
blog_title: str,
keywords_data: Dict[str, Any]
keywords_data: Dict[str, Any],
outline: Optional[List[Dict[str, Any]]] = None,
seo_analysis: Optional[Dict[str, Any]] = None
) -> str:
"""Create high-quality prompt for social metadata generation"""
@@ -353,49 +500,68 @@ Make it compelling and SEO-optimized.
search_intent = keywords_data.get('search_intent', 'informational')
target_audience = keywords_data.get('target_audience', 'general')
industry = keywords_data.get('industry', 'general')
current_date = datetime.now().isoformat()
# Add outline and SEO context similar to core metadata prompt
outline_context = ""
if outline:
headings = [s.get('heading', '') for s in outline if s.get('heading')]
outline_context = f"\nOUTLINE SECTIONS: {', '.join(headings[:6])}\n"
seo_context = ""
if seo_analysis:
overall_score = seo_analysis.get('overall_score', seo_analysis.get('seo_score', 0))
seo_context = f"\nSEO SCORE: {overall_score}/100 (optimized content)\n"
content_preview = self._extract_content_highlights(blog_content, 1500)
prompt = f"""
Generate social media metadata for this blog post.
Generate engaging social media metadata for this blog post.
BLOG TITLE: {blog_title}
BLOG CONTENT: {blog_content[:800]}...
PRIMARY KEYWORDS: {primary_keywords}
=== CONTENT ===
TITLE: {blog_title}
CONTENT: {content_preview}
{outline_context}
{seo_context}
KEYWORDS: {primary_keywords}
TARGET AUDIENCE: {target_audience}
INDUSTRY: {industry}
CURRENT DATE: {current_date}
Generate:
=== GENERATION REQUIREMENTS ===
1. OPEN GRAPH (Facebook/LinkedIn):
- title: 60 chars max
- description: 160 chars max
- image: image URL
- title: 60 chars max, include primary keyword, compelling for {target_audience}
- description: 160 chars max, include CTA and value proposition
- image: Suggest an appropriate image URL (placeholder if none available)
- type: "article"
- site_name: site name
- url: canonical URL
- site_name: Use appropriate site name for {industry} industry
- url: Generate canonical URL structure
2. TWITTER CARD:
- card: "summary_large_image"
- title: 70 chars max
- description: 200 chars max with hashtags
- image: image URL
- site: @sitename
- creator: @author
- title: 70 chars max, optimized for Twitter audience
- description: 200 chars max with relevant hashtags inline
- image: Match Open Graph image
- site: @yourwebsite (placeholder, user should update)
- creator: @author (placeholder, user should update)
3. JSON-LD SCHEMA:
3. JSON-LD SCHEMA (Article):
- @context: "https://schema.org"
- @type: "Article"
- headline: article title
- description: article description
- author: {{"@type": "Person", "name": "Author Name"}}
- publisher: {{"@type": "Organization", "name": "Site Name"}}
- datePublished: ISO date
- dateModified: ISO date
- mainEntityOfPage: canonical URL
- keywords: array of keywords
- wordCount: word count
- headline: Article title (optimized)
- description: Article description (150-200 chars)
- author: {{"@type": "Person", "name": "Author Name"}} (placeholder)
- publisher: {{"@type": "Organization", "name": "Site Name", "logo": {{"@type": "ImageObject", "url": "logo-url"}}}}
- datePublished: {current_date}
- dateModified: {current_date}
- mainEntityOfPage: {{"@type": "WebPage", "@id": "canonical-url"}}
- keywords: Array of primary and semantic keywords
- wordCount: {len(blog_content.split())}
- articleSection: Primary category based on content
- inLanguage: "en-US"
Make it engaging and SEO-optimized.
Make it engaging, personalized for {target_audience}, and optimized for {industry} industry.
"""
return prompt

View File

@@ -0,0 +1,269 @@
"""Blog SEO Recommendation Applier
Applies actionable SEO recommendations to existing blog content using the
provider-agnostic `llm_text_gen` dispatcher. Ensures GPT_PROVIDER parity.
"""
import asyncio
from typing import Dict, Any, List
from utils.logger_utils import get_service_logger
from services.llm_providers.main_text_generation import llm_text_gen
logger = get_service_logger("blog_seo_recommendation_applier")
class BlogSEORecommendationApplier:
"""Apply actionable SEO recommendations to blog content."""
def __init__(self):
logger.debug("Initialized BlogSEORecommendationApplier")
async def apply_recommendations(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Apply recommendations and return updated content."""
title = payload.get("title", "Untitled Blog")
sections: List[Dict[str, Any]] = payload.get("sections", [])
outline = payload.get("outline", [])
research = payload.get("research", {})
recommendations = payload.get("recommendations", [])
persona = payload.get("persona", {})
tone = payload.get("tone")
audience = payload.get("audience")
if not sections:
return {"success": False, "error": "No sections provided for recommendation application"}
if not recommendations:
logger.warning("apply_recommendations called without recommendations")
return {"success": True, "title": title, "sections": sections, "applied": []}
prompt = self._build_prompt(
title=title,
sections=sections,
outline=outline,
research=research,
recommendations=recommendations,
persona=persona,
tone=tone,
audience=audience,
)
schema = {
"type": "object",
"properties": {
"title": {"type": "string"},
"sections": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {"type": "string"},
"heading": {"type": "string"},
"content": {"type": "string"},
"notes": {"type": "array", "items": {"type": "string"}},
},
"required": ["id", "heading", "content"],
},
},
"applied_recommendations": {
"type": "array",
"items": {
"type": "object",
"properties": {
"category": {"type": "string"},
"summary": {"type": "string"},
},
},
},
},
"required": ["sections"],
}
logger.info("Applying SEO recommendations via llm_text_gen")
result = await asyncio.to_thread(
llm_text_gen,
prompt,
None,
schema,
)
if not result or result.get("error"):
error_msg = result.get("error", "Unknown error") if result else "No response from text generator"
logger.error(f"SEO recommendation application failed: {error_msg}")
return {"success": False, "error": error_msg}
raw_sections = result.get("sections", []) or []
normalized_sections: List[Dict[str, Any]] = []
# Build lookup table from updated sections using their identifiers
updated_map: Dict[str, Dict[str, Any]] = {}
for updated in raw_sections:
section_id = str(
updated.get("id")
or updated.get("section_id")
or updated.get("heading")
or ""
).strip()
if not section_id:
continue
heading = (
updated.get("heading")
or updated.get("title")
or section_id
)
content_text = updated.get("content", "")
if isinstance(content_text, list):
content_text = "\n\n".join(str(p).strip() for p in content_text if p)
updated_map[section_id] = {
"id": section_id,
"heading": heading,
"content": str(content_text).strip(),
"notes": updated.get("notes", []),
}
if not updated_map and raw_sections:
logger.warning("Updated sections missing identifiers; falling back to positional mapping")
for index, original in enumerate(sections):
fallback_id = str(
original.get("id")
or original.get("section_id")
or f"section_{index + 1}"
).strip()
mapped = updated_map.get(fallback_id)
if not mapped and raw_sections:
# Fall back to positional match if identifier lookup failed
candidate = raw_sections[index] if index < len(raw_sections) else {}
heading = (
candidate.get("heading")
or candidate.get("title")
or original.get("heading")
or original.get("title")
or f"Section {index + 1}"
)
content_text = candidate.get("content") or original.get("content", "")
if isinstance(content_text, list):
content_text = "\n\n".join(str(p).strip() for p in content_text if p)
mapped = {
"id": fallback_id,
"heading": heading,
"content": str(content_text).strip(),
"notes": candidate.get("notes", []),
}
if not mapped:
# Fallback to original content if nothing else available
mapped = {
"id": fallback_id,
"heading": original.get("heading") or original.get("title") or f"Section {index + 1}",
"content": str(original.get("content", "")).strip(),
"notes": original.get("notes", []),
}
normalized_sections.append(mapped)
applied = result.get("applied_recommendations", [])
logger.info("SEO recommendations applied successfully")
return {
"success": True,
"title": result.get("title", title),
"sections": normalized_sections,
"applied": applied,
}
def _build_prompt(
self,
*,
title: str,
sections: List[Dict[str, Any]],
outline: List[Dict[str, Any]],
research: Dict[str, Any],
recommendations: List[Dict[str, Any]],
persona: Dict[str, Any],
tone: str | None,
audience: str | None,
) -> str:
"""Construct prompt for applying recommendations."""
sections_str = []
for section in sections:
sections_str.append(
f"ID: {section.get('id', 'section')}, Heading: {section.get('heading', 'Untitled')}\n"
f"Current Content:\n{section.get('content', '')}\n"
)
outline_str = "\n".join(
[
f"- {item.get('heading', 'Section')} (Target words: {item.get('target_words', 'N/A')})"
for item in outline
]
)
research_summary = research.get("keyword_analysis", {}) if research else {}
primary_keywords = ", ".join(research_summary.get("primary", [])[:10]) or "None"
recommendations_str = []
for rec in recommendations:
recommendations_str.append(
f"Category: {rec.get('category', 'General')} | Priority: {rec.get('priority', 'Medium')}\n"
f"Recommendation: {rec.get('recommendation', '')}\n"
f"Impact: {rec.get('impact', '')}\n"
)
persona_str = (
f"Persona: {persona}\n"
if persona
else "Persona: (not provided)\n"
)
style_guidance = []
if tone:
style_guidance.append(f"Desired tone: {tone}")
if audience:
style_guidance.append(f"Target audience: {audience}")
style_str = "\n".join(style_guidance) if style_guidance else "Maintain current tone and audience alignment."
prompt = f"""
You are an expert SEO content strategist. Update the blog content to apply the actionable recommendations.
Current Title: {title}
Primary Keywords (for context): {primary_keywords}
Outline Overview:
{outline_str or 'No outline supplied'}
Existing Sections:
{''.join(sections_str)}
Actionable Recommendations to Apply:
{''.join(recommendations_str)}
{persona_str}
{style_str}
Instructions:
1. Carefully apply the recommendations while preserving factual accuracy and research alignment.
2. Keep section identifiers (IDs) unchanged so the frontend can map updates correctly.
3. Improve clarity, flow, and SEO optimization per the guidance.
4. Return updated sections in the requested JSON format.
5. Provide a short summary of which recommendations were addressed.
"""
return prompt
__all__ = ["BlogSEORecommendationApplier"]