Added image generation to blog writer
This commit is contained in:
@@ -1,12 +1,14 @@
|
||||
"""
|
||||
EnhancedContentGenerator - thin orchestrator combining URL selection and Gemini provider.
|
||||
EnhancedContentGenerator - thin orchestrator for section generation.
|
||||
|
||||
Provides Draft vs Polished modes and optional URL Context usage.
|
||||
Provider parity:
|
||||
- Uses main_text_generation.llm_text_gen to respect GPT_PROVIDER (Gemini/HF)
|
||||
- No direct provider coupling here; Google grounding remains in research only
|
||||
"""
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from .source_url_manager import SourceURLManager
|
||||
from .context_memory import ContextMemory
|
||||
from .transition_generator import TransitionGenerator
|
||||
@@ -15,24 +17,37 @@ from .flow_analyzer import FlowAnalyzer
|
||||
|
||||
class EnhancedContentGenerator:
|
||||
def __init__(self):
|
||||
self.provider = GeminiGroundedProvider()
|
||||
self.url_manager = SourceURLManager()
|
||||
self.memory = ContextMemory(max_entries=12)
|
||||
self.transitioner = TransitionGenerator()
|
||||
self.flow = FlowAnalyzer()
|
||||
|
||||
async def generate_section(self, section: Any, research: Any, mode: str = "polished") -> Dict[str, Any]:
|
||||
urls = self.url_manager.pick_relevant_urls(section, research)
|
||||
prev_summary = self.memory.build_previous_sections_summary(limit=2)
|
||||
prompt = self._build_prompt(section, research, prev_summary)
|
||||
result = await self.provider.generate_grounded_content(
|
||||
prompt=prompt,
|
||||
content_type="linkedin_article",
|
||||
temperature=0.6 if mode == "polished" else 0.8,
|
||||
max_tokens=2048,
|
||||
urls=urls,
|
||||
mode=mode,
|
||||
)
|
||||
urls = self.url_manager.pick_relevant_urls(section, research)
|
||||
prompt = self._build_prompt(section, research, prev_summary, urls)
|
||||
# Provider-agnostic text generation (respect GPT_PROVIDER & circuit-breaker)
|
||||
content_text: str = ""
|
||||
try:
|
||||
ai_resp = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=None,
|
||||
system_prompt=None,
|
||||
)
|
||||
if isinstance(ai_resp, dict) and ai_resp.get("text"):
|
||||
content_text = ai_resp.get("text", "")
|
||||
elif isinstance(ai_resp, str):
|
||||
content_text = ai_resp
|
||||
else:
|
||||
# Fallback best-effort extraction
|
||||
content_text = str(ai_resp or "")
|
||||
except Exception as e:
|
||||
content_text = ""
|
||||
|
||||
result = {
|
||||
"content": content_text,
|
||||
"sources": [{"title": u.get("title", ""), "url": u.get("url", "")} for u in urls] if urls else [],
|
||||
}
|
||||
# Generate transition and compute intelligent flow metrics
|
||||
previous_text = prev_summary
|
||||
current_text = result.get("content", "")
|
||||
@@ -56,19 +71,22 @@ class EnhancedContentGenerator:
|
||||
pass
|
||||
return result
|
||||
|
||||
def _build_prompt(self, section: Any, research: Any, prev_summary: str) -> str:
|
||||
def _build_prompt(self, section: Any, research: Any, prev_summary: str, urls: list) -> str:
|
||||
heading = getattr(section, 'heading', 'Section')
|
||||
key_points = getattr(section, 'key_points', [])
|
||||
keywords = getattr(section, 'keywords', [])
|
||||
target_words = getattr(section, 'target_words', 300)
|
||||
url_block = "\n".join([f"- {u.get('title','')} ({u.get('url','')})" for u in urls]) if urls else "(no specific URLs provided)"
|
||||
|
||||
return (
|
||||
f"You are writing the blog section '{heading}'.\n\n"
|
||||
f"Context summary: {prev_summary}\n"
|
||||
f"Key points: {', '.join(key_points)}\n"
|
||||
f"Keywords: {', '.join(keywords)}\n"
|
||||
f"Target word count: {target_words}.\n"
|
||||
"Use only factual info from provided sources; add short transition, then body."
|
||||
f"Context summary (previous sections): {prev_summary}\n\n"
|
||||
f"Authoring requirements:\n"
|
||||
f"- Target word count: ~{target_words}\n"
|
||||
f"- Use the following key points: {', '.join(key_points)}\n"
|
||||
f"- Include these keywords naturally: {', '.join(keywords)}\n"
|
||||
f"- Cite insights from these sources when relevant (do not output raw URLs):\n{url_block}\n\n"
|
||||
"Write engaging, well-structured markdown with clear paragraphs (2-4 sentences each) separated by double line breaks."
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ from models.blog_models import (
|
||||
MediumGeneratedSection,
|
||||
ResearchSource,
|
||||
)
|
||||
from services.llm_providers.gemini_provider import gemini_structured_json_response
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from services.cache.persistent_content_cache import persistent_content_cache
|
||||
|
||||
|
||||
@@ -176,11 +176,9 @@ class MediumBlogGenerator:
|
||||
f"Sections to write:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
|
||||
)
|
||||
|
||||
ai_resp = gemini_structured_json_response(
|
||||
ai_resp = llm_text_gen(
|
||||
prompt=prompt,
|
||||
schema=schema,
|
||||
temperature=0.2,
|
||||
max_tokens=8192,
|
||||
json_struct=schema,
|
||||
system_prompt=system,
|
||||
)
|
||||
|
||||
|
||||
@@ -275,11 +275,17 @@ class BlogWriterService:
|
||||
# Initialize metadata generator
|
||||
metadata_generator = BlogSEOMetadataGenerator()
|
||||
|
||||
# Generate comprehensive metadata
|
||||
# Extract outline and seo_analysis from request
|
||||
outline = request.outline if hasattr(request, 'outline') else None
|
||||
seo_analysis = request.seo_analysis if hasattr(request, 'seo_analysis') else None
|
||||
|
||||
# Generate comprehensive metadata with full context
|
||||
metadata_results = await metadata_generator.generate_comprehensive_metadata(
|
||||
blog_content=request.content,
|
||||
blog_title=request.title or "Untitled Blog Post",
|
||||
research_data=request.research_data or {}
|
||||
research_data=request.research_data or {},
|
||||
outline=outline,
|
||||
seo_analysis=seo_analysis
|
||||
)
|
||||
|
||||
# Convert to BlogSEOMetadataResponse format
|
||||
|
||||
@@ -40,7 +40,7 @@ Return JSON format:
|
||||
}}"""
|
||||
|
||||
try:
|
||||
from services.llm_providers.gemini_provider import gemini_structured_json_response
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
optimization_schema = {
|
||||
"type": "object",
|
||||
@@ -64,11 +64,10 @@ Return JSON format:
|
||||
"propertyOrdering": ["outline"]
|
||||
}
|
||||
|
||||
optimized_data = gemini_structured_json_response(
|
||||
optimized_data = llm_text_gen(
|
||||
prompt=optimization_prompt,
|
||||
schema=optimization_schema,
|
||||
temperature=0.3,
|
||||
max_tokens=6000 # Match main outline generator
|
||||
json_struct=optimization_schema,
|
||||
system_prompt=None
|
||||
)
|
||||
|
||||
# Handle the new schema format with "outline" wrapper
|
||||
|
||||
@@ -20,7 +20,7 @@ class ResponseProcessor:
|
||||
|
||||
async def generate_with_retry(self, prompt: str, schema: Dict[str, Any], task_id: str = None) -> Dict[str, Any]:
|
||||
"""Generate outline with retry logic for API failures."""
|
||||
from services.llm_providers.gemini_provider import gemini_structured_json_response
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from api.blog_writer.task_manager import task_manager
|
||||
|
||||
max_retries = 2 # Conservative retry for expensive API calls
|
||||
@@ -29,17 +29,16 @@ class ResponseProcessor:
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
if task_id:
|
||||
await task_manager.update_progress(task_id, f"🤖 Calling Gemini API for outline generation (attempt {attempt + 1}/{max_retries + 1})...")
|
||||
await task_manager.update_progress(task_id, f"🤖 Calling AI API for outline generation (attempt {attempt + 1}/{max_retries + 1})...")
|
||||
|
||||
outline_data = gemini_structured_json_response(
|
||||
outline_data = llm_text_gen(
|
||||
prompt=prompt,
|
||||
schema=schema,
|
||||
temperature=0.3,
|
||||
max_tokens=6000 # Increased further to avoid truncation
|
||||
json_struct=schema,
|
||||
system_prompt=None
|
||||
)
|
||||
|
||||
# Log response for debugging
|
||||
logger.info(f"Gemini response received: {type(outline_data)}")
|
||||
logger.info(f"AI response received: {type(outline_data)}")
|
||||
|
||||
# Check for errors in the response
|
||||
if isinstance(outline_data, dict) and 'error' in outline_data:
|
||||
@@ -47,17 +46,17 @@ class ResponseProcessor:
|
||||
if "503" in error_msg and "overloaded" in error_msg and attempt < max_retries:
|
||||
if task_id:
|
||||
await task_manager.update_progress(task_id, f"⚠️ AI service overloaded, retrying in {retry_delay} seconds...")
|
||||
logger.warning(f"Gemini API overloaded, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
|
||||
logger.warning(f"AI API overloaded, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
|
||||
await asyncio.sleep(retry_delay)
|
||||
continue
|
||||
elif "No valid structured response content found" in error_msg and attempt < max_retries:
|
||||
if task_id:
|
||||
await task_manager.update_progress(task_id, f"⚠️ Invalid response format, retrying in {retry_delay} seconds...")
|
||||
logger.warning(f"Gemini response parsing failed, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
|
||||
logger.warning(f"AI response parsing failed, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1})")
|
||||
await asyncio.sleep(retry_delay)
|
||||
continue
|
||||
else:
|
||||
logger.error(f"Gemini structured response error: {outline_data['error']}")
|
||||
logger.error(f"AI structured response error: {outline_data['error']}")
|
||||
raise ValueError(f"AI outline generation failed: {outline_data['error']}")
|
||||
|
||||
# Validate required fields
|
||||
@@ -69,7 +68,7 @@ class ResponseProcessor:
|
||||
await asyncio.sleep(retry_delay)
|
||||
continue
|
||||
else:
|
||||
raise ValueError("Invalid outline structure in Gemini response")
|
||||
raise ValueError("Invalid outline structure in AI response")
|
||||
|
||||
# If we get here, the response is valid
|
||||
return outline_data
|
||||
@@ -79,7 +78,7 @@ class ResponseProcessor:
|
||||
if ("503" in error_str or "overloaded" in error_str) and attempt < max_retries:
|
||||
if task_id:
|
||||
await task_manager.update_progress(task_id, f"⚠️ AI service error, retrying in {retry_delay} seconds...")
|
||||
logger.warning(f"Gemini API error, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1}): {error_str}")
|
||||
logger.warning(f"AI API error, retrying in {retry_delay} seconds (attempt {attempt + 1}/{max_retries + 1}): {error_str}")
|
||||
await asyncio.sleep(retry_delay)
|
||||
continue
|
||||
else:
|
||||
|
||||
@@ -44,7 +44,7 @@ class SectionEnhancer:
|
||||
"""
|
||||
|
||||
try:
|
||||
from services.llm_providers.gemini_provider import gemini_structured_json_response
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
enhancement_schema = {
|
||||
"type": "object",
|
||||
@@ -58,11 +58,10 @@ class SectionEnhancer:
|
||||
"required": ["heading", "subheadings", "key_points", "target_words", "keywords"]
|
||||
}
|
||||
|
||||
enhanced_data = gemini_structured_json_response(
|
||||
enhanced_data = llm_text_gen(
|
||||
prompt=enhancement_prompt,
|
||||
schema=enhancement_schema,
|
||||
temperature=0.4,
|
||||
max_tokens=1000
|
||||
json_struct=enhancement_schema,
|
||||
system_prompt=None
|
||||
)
|
||||
|
||||
if isinstance(enhanced_data, dict) and 'error' not in enhanced_data:
|
||||
|
||||
@@ -559,14 +559,11 @@ Analyze the mapping and provide your recommendations.
|
||||
AI validation response
|
||||
"""
|
||||
try:
|
||||
from services.llm_providers.gemini_provider import gemini_text_response
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
response = gemini_text_response(
|
||||
response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
temperature=0.3,
|
||||
top_p=0.9,
|
||||
n=1,
|
||||
max_tokens=2000,
|
||||
json_struct=None,
|
||||
system_prompt=None
|
||||
)
|
||||
|
||||
|
||||
@@ -10,13 +10,13 @@ import re
|
||||
import textstat
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.seo_analyzer import (
|
||||
ContentAnalyzer, KeywordAnalyzer,
|
||||
URLStructureAnalyzer, AIInsightGenerator
|
||||
)
|
||||
from services.llm_providers.gemini_provider import gemini_structured_json_response
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
|
||||
class BlogContentSEOAnalyzer:
|
||||
@@ -24,11 +24,13 @@ class BlogContentSEOAnalyzer:
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the blog content SEO analyzer"""
|
||||
# Service-specific logger (no global reconfiguration)
|
||||
global logger
|
||||
logger = get_service_logger("blog_content_seo_analyzer")
|
||||
self.content_analyzer = ContentAnalyzer()
|
||||
self.keyword_analyzer = KeywordAnalyzer()
|
||||
self.url_analyzer = URLStructureAnalyzer()
|
||||
self.ai_insights = AIInsightGenerator()
|
||||
self.gemini_provider = gemini_structured_json_response
|
||||
|
||||
logger.info("BlogContentSEOAnalyzer initialized")
|
||||
|
||||
@@ -598,7 +600,7 @@ class BlogContentSEOAnalyzer:
|
||||
return recommendations
|
||||
|
||||
async def _run_ai_analysis(self, blog_content: str, keywords_data: Dict[str, Any], non_ai_results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Run single AI analysis for structured insights"""
|
||||
"""Run single AI analysis for structured insights (provider-agnostic)"""
|
||||
try:
|
||||
# Prepare context for AI analysis
|
||||
context = {
|
||||
@@ -610,7 +612,6 @@ class BlogContentSEOAnalyzer:
|
||||
# Create AI prompt for structured analysis
|
||||
prompt = self._create_ai_analysis_prompt(context)
|
||||
|
||||
# Get structured response from Gemini
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -653,18 +654,17 @@ class BlogContentSEOAnalyzer:
|
||||
}
|
||||
}
|
||||
|
||||
ai_response = self.gemini_provider(
|
||||
# Provider-agnostic structured response respecting GPT_PROVIDER
|
||||
ai_response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
schema=schema,
|
||||
temperature=0.2,
|
||||
max_tokens=8192
|
||||
json_struct=schema,
|
||||
system_prompt=None
|
||||
)
|
||||
|
||||
return ai_response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"AI analysis failed: {e}")
|
||||
# Fail fast - don't return mock data
|
||||
raise e
|
||||
|
||||
def _create_ai_analysis_prompt(self, context: Dict[str, Any]) -> str:
|
||||
|
||||
@@ -12,7 +12,7 @@ from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
|
||||
from services.llm_providers.gemini_provider import gemini_structured_json_response
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
|
||||
class BlogSEOMetadataGenerator:
|
||||
@@ -20,14 +20,15 @@ class BlogSEOMetadataGenerator:
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the metadata generator"""
|
||||
self.gemini_provider = gemini_structured_json_response
|
||||
logger.info("BlogSEOMetadataGenerator initialized")
|
||||
|
||||
async def generate_comprehensive_metadata(
|
||||
self,
|
||||
blog_content: str,
|
||||
blog_title: str,
|
||||
research_data: Dict[str, Any]
|
||||
research_data: Dict[str, Any],
|
||||
outline: Optional[List[Dict[str, Any]]] = None,
|
||||
seo_analysis: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate comprehensive SEO metadata using maximum 2 AI calls
|
||||
@@ -36,6 +37,8 @@ class BlogSEOMetadataGenerator:
|
||||
blog_content: The blog content to analyze
|
||||
blog_title: The blog title
|
||||
research_data: Research data containing keywords and insights
|
||||
outline: Outline structure with sections and headings
|
||||
seo_analysis: SEO analysis results from previous phase
|
||||
|
||||
Returns:
|
||||
Comprehensive metadata including all SEO elements
|
||||
@@ -49,11 +52,15 @@ class BlogSEOMetadataGenerator:
|
||||
|
||||
# Call 1: Generate core SEO metadata (parallel with Call 2)
|
||||
logger.info("Generating core SEO metadata")
|
||||
core_metadata_task = self._generate_core_metadata(blog_content, blog_title, keywords_data)
|
||||
core_metadata_task = self._generate_core_metadata(
|
||||
blog_content, blog_title, keywords_data, outline, seo_analysis
|
||||
)
|
||||
|
||||
# Call 2: Generate social media and structured data (parallel with Call 1)
|
||||
logger.info("Generating social media and structured data")
|
||||
social_metadata_task = self._generate_social_metadata(blog_content, blog_title, keywords_data)
|
||||
social_metadata_task = self._generate_social_metadata(
|
||||
blog_content, blog_title, keywords_data, outline, seo_analysis
|
||||
)
|
||||
|
||||
# Wait for both calls to complete
|
||||
core_metadata, social_metadata = await asyncio.gather(
|
||||
@@ -105,12 +112,16 @@ class BlogSEOMetadataGenerator:
|
||||
self,
|
||||
blog_content: str,
|
||||
blog_title: str,
|
||||
keywords_data: Dict[str, Any]
|
||||
keywords_data: Dict[str, Any],
|
||||
outline: Optional[List[Dict[str, Any]]] = None,
|
||||
seo_analysis: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate core SEO metadata (Call 1)"""
|
||||
try:
|
||||
# Create comprehensive prompt for core metadata
|
||||
prompt = self._create_core_metadata_prompt(blog_content, blog_title, keywords_data)
|
||||
prompt = self._create_core_metadata_prompt(
|
||||
blog_content, blog_title, keywords_data, outline, seo_analysis
|
||||
)
|
||||
|
||||
# Define simplified structured schema for core metadata
|
||||
schema = {
|
||||
@@ -155,17 +166,26 @@ class BlogSEOMetadataGenerator:
|
||||
"required": ["seo_title", "meta_description", "url_slug", "blog_tags", "blog_categories", "social_hashtags", "reading_time", "focus_keyword"]
|
||||
}
|
||||
|
||||
# Get structured response from Gemini
|
||||
ai_response = self.gemini_provider(
|
||||
prompt,
|
||||
schema,
|
||||
temperature=0.3,
|
||||
max_tokens=2048
|
||||
# Get structured response using provider-agnostic llm_text_gen
|
||||
ai_response_raw = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=schema,
|
||||
system_prompt=None
|
||||
)
|
||||
|
||||
# Handle response: llm_text_gen may return dict (from structured JSON) or str (needs parsing)
|
||||
ai_response = ai_response_raw
|
||||
if isinstance(ai_response_raw, str):
|
||||
try:
|
||||
import json
|
||||
ai_response = json.loads(ai_response_raw)
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"Failed to parse JSON response: {ai_response_raw[:200]}...")
|
||||
ai_response = None
|
||||
|
||||
# Check if we got a valid response
|
||||
if not ai_response or not isinstance(ai_response, dict):
|
||||
logger.error("Core metadata generation failed: Invalid response from Gemini")
|
||||
logger.error("Core metadata generation failed: Invalid response from LLM")
|
||||
# Return fallback response
|
||||
primary_keywords = ', '.join(keywords_data.get('primary_keywords', ['content']))
|
||||
word_count = len(blog_content.split())
|
||||
@@ -193,12 +213,16 @@ class BlogSEOMetadataGenerator:
|
||||
self,
|
||||
blog_content: str,
|
||||
blog_title: str,
|
||||
keywords_data: Dict[str, Any]
|
||||
keywords_data: Dict[str, Any],
|
||||
outline: Optional[List[Dict[str, Any]]] = None,
|
||||
seo_analysis: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate social media and structured data (Call 2)"""
|
||||
try:
|
||||
# Create comprehensive prompt for social metadata
|
||||
prompt = self._create_social_metadata_prompt(blog_content, blog_title, keywords_data)
|
||||
prompt = self._create_social_metadata_prompt(
|
||||
blog_content, blog_title, keywords_data, outline, seo_analysis
|
||||
)
|
||||
|
||||
# Define simplified structured schema for social metadata
|
||||
schema = {
|
||||
@@ -246,17 +270,26 @@ class BlogSEOMetadataGenerator:
|
||||
"required": ["open_graph", "twitter_card", "json_ld_schema"]
|
||||
}
|
||||
|
||||
# Get structured response from Gemini
|
||||
ai_response = self.gemini_provider(
|
||||
prompt,
|
||||
schema,
|
||||
temperature=0.3,
|
||||
max_tokens=2048
|
||||
# Get structured response using provider-agnostic llm_text_gen
|
||||
ai_response_raw = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=schema,
|
||||
system_prompt=None
|
||||
)
|
||||
|
||||
# Handle response: llm_text_gen may return dict (from structured JSON) or str (needs parsing)
|
||||
ai_response = ai_response_raw
|
||||
if isinstance(ai_response_raw, str):
|
||||
try:
|
||||
import json
|
||||
ai_response = json.loads(ai_response_raw)
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"Failed to parse JSON response: {ai_response_raw[:200]}...")
|
||||
ai_response = None
|
||||
|
||||
# Check if we got a valid response
|
||||
if not ai_response or not isinstance(ai_response, dict) or not ai_response.get('open_graph') or not ai_response.get('twitter_card') or not ai_response.get('json_ld_schema'):
|
||||
logger.error("Social metadata generation failed: Invalid or empty response from Gemini")
|
||||
logger.error("Social metadata generation failed: Invalid or empty response from LLM")
|
||||
# Return fallback response
|
||||
return {
|
||||
'open_graph': {
|
||||
@@ -301,11 +334,47 @@ class BlogSEOMetadataGenerator:
|
||||
logger.error(f"Social metadata generation failed: {e}")
|
||||
raise e
|
||||
|
||||
def _extract_content_highlights(self, blog_content: str, max_length: int = 2500) -> str:
|
||||
"""Extract key sections from blog content for prompt context"""
|
||||
try:
|
||||
lines = blog_content.split('\n')
|
||||
|
||||
# Get first paragraph (introduction)
|
||||
intro = ""
|
||||
for line in lines[:20]:
|
||||
if line.strip() and not line.strip().startswith('#'):
|
||||
intro += line.strip() + " "
|
||||
if len(intro) > 300:
|
||||
break
|
||||
|
||||
# Get section headings
|
||||
headings = [line.strip() for line in lines if line.strip().startswith('##')][:6]
|
||||
|
||||
# Get conclusion if available
|
||||
conclusion = ""
|
||||
for line in reversed(lines[-20:]):
|
||||
if line.strip() and not line.strip().startswith('#'):
|
||||
conclusion = line.strip() + " " + conclusion
|
||||
if len(conclusion) > 300:
|
||||
break
|
||||
|
||||
highlights = f"INTRODUCTION: {intro[:300]}...\n\n"
|
||||
highlights += f"SECTION HEADINGS: {' | '.join([h.replace('##', '').strip() for h in headings])}\n\n"
|
||||
if conclusion:
|
||||
highlights += f"CONCLUSION: {conclusion[:300]}..."
|
||||
|
||||
return highlights[:max_length]
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract content highlights: {e}")
|
||||
return blog_content[:2000] + "..."
|
||||
|
||||
def _create_core_metadata_prompt(
|
||||
self,
|
||||
blog_content: str,
|
||||
blog_title: str,
|
||||
keywords_data: Dict[str, Any]
|
||||
keywords_data: Dict[str, Any],
|
||||
outline: Optional[List[Dict[str, Any]]] = None,
|
||||
seo_analysis: Optional[Dict[str, Any]] = None
|
||||
) -> str:
|
||||
"""Create high-quality prompt for core metadata generation"""
|
||||
|
||||
@@ -314,30 +383,106 @@ class BlogSEOMetadataGenerator:
|
||||
search_intent = keywords_data.get('search_intent', 'informational')
|
||||
target_audience = keywords_data.get('target_audience', 'general')
|
||||
industry = keywords_data.get('industry', 'general')
|
||||
|
||||
# Calculate word count for reading time estimation
|
||||
word_count = len(blog_content.split())
|
||||
|
||||
# Extract outline structure
|
||||
outline_context = ""
|
||||
if outline:
|
||||
headings = [s.get('heading', '') for s in outline if s.get('heading')]
|
||||
outline_context = f"""
|
||||
OUTLINE STRUCTURE:
|
||||
- Total sections: {len(outline)}
|
||||
- Section headings: {', '.join(headings[:8])}
|
||||
- Content hierarchy: Well-structured with {len(outline)} main sections
|
||||
"""
|
||||
|
||||
# Extract SEO analysis insights
|
||||
seo_context = ""
|
||||
if seo_analysis:
|
||||
overall_score = seo_analysis.get('overall_score', seo_analysis.get('seo_score', 0))
|
||||
category_scores = seo_analysis.get('category_scores', {})
|
||||
applied_recs = seo_analysis.get('applied_recommendations', [])
|
||||
|
||||
seo_context = f"""
|
||||
SEO ANALYSIS RESULTS:
|
||||
- Overall SEO Score: {overall_score}/100
|
||||
- Category Scores: Structure {category_scores.get('structure', category_scores.get('Structure', 0))}, Keywords {category_scores.get('keywords', category_scores.get('Keywords', 0))}, Readability {category_scores.get('readability', category_scores.get('Readability', 0))}
|
||||
- Applied Recommendations: {len(applied_recs)} SEO optimizations have been applied
|
||||
- Content Quality: Optimized for search engines with keyword focus
|
||||
"""
|
||||
|
||||
# Get more content context (key sections instead of just first 1000 chars)
|
||||
content_preview = self._extract_content_highlights(blog_content)
|
||||
|
||||
prompt = f"""
|
||||
Generate SEO metadata for this blog post.
|
||||
Generate comprehensive, personalized SEO metadata for this blog post.
|
||||
|
||||
BLOG TITLE: {blog_title}
|
||||
BLOG CONTENT: {blog_content[:1000]}...
|
||||
=== BLOG CONTENT CONTEXT ===
|
||||
TITLE: {blog_title}
|
||||
CONTENT PREVIEW (key sections): {content_preview}
|
||||
WORD COUNT: {word_count} words
|
||||
READING TIME ESTIMATE: {max(1, word_count // 200)} minutes
|
||||
|
||||
{outline_context}
|
||||
|
||||
=== KEYWORD & AUDIENCE DATA ===
|
||||
PRIMARY KEYWORDS: {primary_keywords}
|
||||
SEMANTIC KEYWORDS: {semantic_keywords}
|
||||
WORD COUNT: {word_count}
|
||||
SEARCH INTENT: {search_intent}
|
||||
TARGET AUDIENCE: {target_audience}
|
||||
INDUSTRY: {industry}
|
||||
|
||||
Generate:
|
||||
1. SEO TITLE (50-60 characters) - include primary keyword
|
||||
2. META DESCRIPTION (150-160 characters) - include CTA
|
||||
3. URL SLUG (lowercase, hyphens, 3-5 words)
|
||||
4. BLOG TAGS (5-8 relevant tags)
|
||||
5. BLOG CATEGORIES (2-3 categories)
|
||||
6. SOCIAL HASHTAGS (5-10 hashtags with #)
|
||||
7. READING TIME (calculate from {word_count} words)
|
||||
8. FOCUS KEYWORD (primary keyword for SEO)
|
||||
{seo_context}
|
||||
|
||||
Make it compelling and SEO-optimized.
|
||||
=== METADATA GENERATION REQUIREMENTS ===
|
||||
1. SEO TITLE (50-60 characters, must include primary keyword):
|
||||
- Front-load primary keyword
|
||||
- Make it compelling and click-worthy
|
||||
- Include power words if appropriate for {target_audience} audience
|
||||
- Optimized for {search_intent} search intent
|
||||
|
||||
2. META DESCRIPTION (150-160 characters, must include CTA):
|
||||
- Include primary keyword naturally in first 120 chars
|
||||
- Add compelling call-to-action (e.g., "Learn more", "Discover how", "Get started")
|
||||
- Highlight value proposition for {target_audience} audience
|
||||
- Use {industry} industry-specific terminology where relevant
|
||||
|
||||
3. URL SLUG (lowercase, hyphens, 3-5 words):
|
||||
- Include primary keyword
|
||||
- Remove stop words
|
||||
- Keep it concise and readable
|
||||
|
||||
4. BLOG TAGS (5-8 relevant tags):
|
||||
- Mix of primary, semantic, and long-tail keywords
|
||||
- Industry-specific tags for {industry}
|
||||
- Audience-relevant tags for {target_audience}
|
||||
|
||||
5. BLOG CATEGORIES (2-3 categories):
|
||||
- Based on content structure and {industry} industry standards
|
||||
- Reflect main themes from outline sections
|
||||
|
||||
6. SOCIAL HASHTAGS (5-10 hashtags with #):
|
||||
- Include primary keyword as hashtag
|
||||
- Industry-specific hashtags for {industry}
|
||||
- Trending/relevant hashtags for {target_audience}
|
||||
|
||||
7. READING TIME (calculate from {word_count} words):
|
||||
- Average reading speed: 200 words/minute
|
||||
- Round to nearest minute
|
||||
|
||||
8. FOCUS KEYWORD (primary keyword for SEO):
|
||||
- Select the most important primary keyword
|
||||
- Should match the main topic and search intent
|
||||
|
||||
=== QUALITY REQUIREMENTS ===
|
||||
- All metadata must be unique, not generic
|
||||
- Incorporate insights from SEO analysis if provided
|
||||
- Reflect the actual content structure from outline
|
||||
- Use language appropriate for {target_audience} audience
|
||||
- Optimize for {search_intent} search intent
|
||||
- Make descriptions compelling and action-oriented
|
||||
|
||||
Generate metadata that is personalized, compelling, and SEO-optimized.
|
||||
"""
|
||||
return prompt
|
||||
|
||||
@@ -345,7 +490,9 @@ Make it compelling and SEO-optimized.
|
||||
self,
|
||||
blog_content: str,
|
||||
blog_title: str,
|
||||
keywords_data: Dict[str, Any]
|
||||
keywords_data: Dict[str, Any],
|
||||
outline: Optional[List[Dict[str, Any]]] = None,
|
||||
seo_analysis: Optional[Dict[str, Any]] = None
|
||||
) -> str:
|
||||
"""Create high-quality prompt for social metadata generation"""
|
||||
|
||||
@@ -353,49 +500,68 @@ Make it compelling and SEO-optimized.
|
||||
search_intent = keywords_data.get('search_intent', 'informational')
|
||||
target_audience = keywords_data.get('target_audience', 'general')
|
||||
industry = keywords_data.get('industry', 'general')
|
||||
|
||||
current_date = datetime.now().isoformat()
|
||||
|
||||
# Add outline and SEO context similar to core metadata prompt
|
||||
outline_context = ""
|
||||
if outline:
|
||||
headings = [s.get('heading', '') for s in outline if s.get('heading')]
|
||||
outline_context = f"\nOUTLINE SECTIONS: {', '.join(headings[:6])}\n"
|
||||
|
||||
seo_context = ""
|
||||
if seo_analysis:
|
||||
overall_score = seo_analysis.get('overall_score', seo_analysis.get('seo_score', 0))
|
||||
seo_context = f"\nSEO SCORE: {overall_score}/100 (optimized content)\n"
|
||||
|
||||
content_preview = self._extract_content_highlights(blog_content, 1500)
|
||||
|
||||
prompt = f"""
|
||||
Generate social media metadata for this blog post.
|
||||
Generate engaging social media metadata for this blog post.
|
||||
|
||||
BLOG TITLE: {blog_title}
|
||||
BLOG CONTENT: {blog_content[:800]}...
|
||||
PRIMARY KEYWORDS: {primary_keywords}
|
||||
=== CONTENT ===
|
||||
TITLE: {blog_title}
|
||||
CONTENT: {content_preview}
|
||||
{outline_context}
|
||||
{seo_context}
|
||||
KEYWORDS: {primary_keywords}
|
||||
TARGET AUDIENCE: {target_audience}
|
||||
INDUSTRY: {industry}
|
||||
CURRENT DATE: {current_date}
|
||||
|
||||
Generate:
|
||||
=== GENERATION REQUIREMENTS ===
|
||||
|
||||
1. OPEN GRAPH (Facebook/LinkedIn):
|
||||
- title: 60 chars max
|
||||
- description: 160 chars max
|
||||
- image: image URL
|
||||
- title: 60 chars max, include primary keyword, compelling for {target_audience}
|
||||
- description: 160 chars max, include CTA and value proposition
|
||||
- image: Suggest an appropriate image URL (placeholder if none available)
|
||||
- type: "article"
|
||||
- site_name: site name
|
||||
- url: canonical URL
|
||||
- site_name: Use appropriate site name for {industry} industry
|
||||
- url: Generate canonical URL structure
|
||||
|
||||
2. TWITTER CARD:
|
||||
- card: "summary_large_image"
|
||||
- title: 70 chars max
|
||||
- description: 200 chars max with hashtags
|
||||
- image: image URL
|
||||
- site: @sitename
|
||||
- creator: @author
|
||||
- title: 70 chars max, optimized for Twitter audience
|
||||
- description: 200 chars max with relevant hashtags inline
|
||||
- image: Match Open Graph image
|
||||
- site: @yourwebsite (placeholder, user should update)
|
||||
- creator: @author (placeholder, user should update)
|
||||
|
||||
3. JSON-LD SCHEMA:
|
||||
3. JSON-LD SCHEMA (Article):
|
||||
- @context: "https://schema.org"
|
||||
- @type: "Article"
|
||||
- headline: article title
|
||||
- description: article description
|
||||
- author: {{"@type": "Person", "name": "Author Name"}}
|
||||
- publisher: {{"@type": "Organization", "name": "Site Name"}}
|
||||
- datePublished: ISO date
|
||||
- dateModified: ISO date
|
||||
- mainEntityOfPage: canonical URL
|
||||
- keywords: array of keywords
|
||||
- wordCount: word count
|
||||
- headline: Article title (optimized)
|
||||
- description: Article description (150-200 chars)
|
||||
- author: {{"@type": "Person", "name": "Author Name"}} (placeholder)
|
||||
- publisher: {{"@type": "Organization", "name": "Site Name", "logo": {{"@type": "ImageObject", "url": "logo-url"}}}}
|
||||
- datePublished: {current_date}
|
||||
- dateModified: {current_date}
|
||||
- mainEntityOfPage: {{"@type": "WebPage", "@id": "canonical-url"}}
|
||||
- keywords: Array of primary and semantic keywords
|
||||
- wordCount: {len(blog_content.split())}
|
||||
- articleSection: Primary category based on content
|
||||
- inLanguage: "en-US"
|
||||
|
||||
Make it engaging and SEO-optimized.
|
||||
Make it engaging, personalized for {target_audience}, and optimized for {industry} industry.
|
||||
"""
|
||||
return prompt
|
||||
|
||||
|
||||
@@ -0,0 +1,269 @@
|
||||
"""Blog SEO Recommendation Applier
|
||||
|
||||
Applies actionable SEO recommendations to existing blog content using the
|
||||
provider-agnostic `llm_text_gen` dispatcher. Ensures GPT_PROVIDER parity.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Dict, Any, List
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
|
||||
logger = get_service_logger("blog_seo_recommendation_applier")
|
||||
|
||||
|
||||
class BlogSEORecommendationApplier:
|
||||
"""Apply actionable SEO recommendations to blog content."""
|
||||
|
||||
def __init__(self):
|
||||
logger.debug("Initialized BlogSEORecommendationApplier")
|
||||
|
||||
async def apply_recommendations(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Apply recommendations and return updated content."""
|
||||
|
||||
title = payload.get("title", "Untitled Blog")
|
||||
sections: List[Dict[str, Any]] = payload.get("sections", [])
|
||||
outline = payload.get("outline", [])
|
||||
research = payload.get("research", {})
|
||||
recommendations = payload.get("recommendations", [])
|
||||
persona = payload.get("persona", {})
|
||||
tone = payload.get("tone")
|
||||
audience = payload.get("audience")
|
||||
|
||||
if not sections:
|
||||
return {"success": False, "error": "No sections provided for recommendation application"}
|
||||
|
||||
if not recommendations:
|
||||
logger.warning("apply_recommendations called without recommendations")
|
||||
return {"success": True, "title": title, "sections": sections, "applied": []}
|
||||
|
||||
prompt = self._build_prompt(
|
||||
title=title,
|
||||
sections=sections,
|
||||
outline=outline,
|
||||
research=research,
|
||||
recommendations=recommendations,
|
||||
persona=persona,
|
||||
tone=tone,
|
||||
audience=audience,
|
||||
)
|
||||
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"sections": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"heading": {"type": "string"},
|
||||
"content": {"type": "string"},
|
||||
"notes": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
"required": ["id", "heading", "content"],
|
||||
},
|
||||
},
|
||||
"applied_recommendations": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"category": {"type": "string"},
|
||||
"summary": {"type": "string"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": ["sections"],
|
||||
}
|
||||
|
||||
logger.info("Applying SEO recommendations via llm_text_gen")
|
||||
|
||||
result = await asyncio.to_thread(
|
||||
llm_text_gen,
|
||||
prompt,
|
||||
None,
|
||||
schema,
|
||||
)
|
||||
|
||||
if not result or result.get("error"):
|
||||
error_msg = result.get("error", "Unknown error") if result else "No response from text generator"
|
||||
logger.error(f"SEO recommendation application failed: {error_msg}")
|
||||
return {"success": False, "error": error_msg}
|
||||
|
||||
raw_sections = result.get("sections", []) or []
|
||||
normalized_sections: List[Dict[str, Any]] = []
|
||||
|
||||
# Build lookup table from updated sections using their identifiers
|
||||
updated_map: Dict[str, Dict[str, Any]] = {}
|
||||
for updated in raw_sections:
|
||||
section_id = str(
|
||||
updated.get("id")
|
||||
or updated.get("section_id")
|
||||
or updated.get("heading")
|
||||
or ""
|
||||
).strip()
|
||||
|
||||
if not section_id:
|
||||
continue
|
||||
|
||||
heading = (
|
||||
updated.get("heading")
|
||||
or updated.get("title")
|
||||
or section_id
|
||||
)
|
||||
|
||||
content_text = updated.get("content", "")
|
||||
if isinstance(content_text, list):
|
||||
content_text = "\n\n".join(str(p).strip() for p in content_text if p)
|
||||
|
||||
updated_map[section_id] = {
|
||||
"id": section_id,
|
||||
"heading": heading,
|
||||
"content": str(content_text).strip(),
|
||||
"notes": updated.get("notes", []),
|
||||
}
|
||||
|
||||
if not updated_map and raw_sections:
|
||||
logger.warning("Updated sections missing identifiers; falling back to positional mapping")
|
||||
|
||||
for index, original in enumerate(sections):
|
||||
fallback_id = str(
|
||||
original.get("id")
|
||||
or original.get("section_id")
|
||||
or f"section_{index + 1}"
|
||||
).strip()
|
||||
|
||||
mapped = updated_map.get(fallback_id)
|
||||
|
||||
if not mapped and raw_sections:
|
||||
# Fall back to positional match if identifier lookup failed
|
||||
candidate = raw_sections[index] if index < len(raw_sections) else {}
|
||||
heading = (
|
||||
candidate.get("heading")
|
||||
or candidate.get("title")
|
||||
or original.get("heading")
|
||||
or original.get("title")
|
||||
or f"Section {index + 1}"
|
||||
)
|
||||
content_text = candidate.get("content") or original.get("content", "")
|
||||
if isinstance(content_text, list):
|
||||
content_text = "\n\n".join(str(p).strip() for p in content_text if p)
|
||||
mapped = {
|
||||
"id": fallback_id,
|
||||
"heading": heading,
|
||||
"content": str(content_text).strip(),
|
||||
"notes": candidate.get("notes", []),
|
||||
}
|
||||
|
||||
if not mapped:
|
||||
# Fallback to original content if nothing else available
|
||||
mapped = {
|
||||
"id": fallback_id,
|
||||
"heading": original.get("heading") or original.get("title") or f"Section {index + 1}",
|
||||
"content": str(original.get("content", "")).strip(),
|
||||
"notes": original.get("notes", []),
|
||||
}
|
||||
|
||||
normalized_sections.append(mapped)
|
||||
|
||||
applied = result.get("applied_recommendations", [])
|
||||
|
||||
logger.info("SEO recommendations applied successfully")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"title": result.get("title", title),
|
||||
"sections": normalized_sections,
|
||||
"applied": applied,
|
||||
}
|
||||
|
||||
def _build_prompt(
|
||||
self,
|
||||
*,
|
||||
title: str,
|
||||
sections: List[Dict[str, Any]],
|
||||
outline: List[Dict[str, Any]],
|
||||
research: Dict[str, Any],
|
||||
recommendations: List[Dict[str, Any]],
|
||||
persona: Dict[str, Any],
|
||||
tone: str | None,
|
||||
audience: str | None,
|
||||
) -> str:
|
||||
"""Construct prompt for applying recommendations."""
|
||||
|
||||
sections_str = []
|
||||
for section in sections:
|
||||
sections_str.append(
|
||||
f"ID: {section.get('id', 'section')}, Heading: {section.get('heading', 'Untitled')}\n"
|
||||
f"Current Content:\n{section.get('content', '')}\n"
|
||||
)
|
||||
|
||||
outline_str = "\n".join(
|
||||
[
|
||||
f"- {item.get('heading', 'Section')} (Target words: {item.get('target_words', 'N/A')})"
|
||||
for item in outline
|
||||
]
|
||||
)
|
||||
|
||||
research_summary = research.get("keyword_analysis", {}) if research else {}
|
||||
primary_keywords = ", ".join(research_summary.get("primary", [])[:10]) or "None"
|
||||
|
||||
recommendations_str = []
|
||||
for rec in recommendations:
|
||||
recommendations_str.append(
|
||||
f"Category: {rec.get('category', 'General')} | Priority: {rec.get('priority', 'Medium')}\n"
|
||||
f"Recommendation: {rec.get('recommendation', '')}\n"
|
||||
f"Impact: {rec.get('impact', '')}\n"
|
||||
)
|
||||
|
||||
persona_str = (
|
||||
f"Persona: {persona}\n"
|
||||
if persona
|
||||
else "Persona: (not provided)\n"
|
||||
)
|
||||
|
||||
style_guidance = []
|
||||
if tone:
|
||||
style_guidance.append(f"Desired tone: {tone}")
|
||||
if audience:
|
||||
style_guidance.append(f"Target audience: {audience}")
|
||||
style_str = "\n".join(style_guidance) if style_guidance else "Maintain current tone and audience alignment."
|
||||
|
||||
prompt = f"""
|
||||
You are an expert SEO content strategist. Update the blog content to apply the actionable recommendations.
|
||||
|
||||
Current Title: {title}
|
||||
|
||||
Primary Keywords (for context): {primary_keywords}
|
||||
|
||||
Outline Overview:
|
||||
{outline_str or 'No outline supplied'}
|
||||
|
||||
Existing Sections:
|
||||
{''.join(sections_str)}
|
||||
|
||||
Actionable Recommendations to Apply:
|
||||
{''.join(recommendations_str)}
|
||||
|
||||
{persona_str}
|
||||
{style_str}
|
||||
|
||||
Instructions:
|
||||
1. Carefully apply the recommendations while preserving factual accuracy and research alignment.
|
||||
2. Keep section identifiers (IDs) unchanged so the frontend can map updates correctly.
|
||||
3. Improve clarity, flow, and SEO optimization per the guidance.
|
||||
4. Return updated sections in the requested JSON format.
|
||||
5. Provide a short summary of which recommendations were addressed.
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
__all__ = ["BlogSEORecommendationApplier"]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user