chore: push all remaining changes
- Blog writer enhancements and bug fixes - Wix integration improvements - Frontend UI updates - GSC dashboard docs cleanup - Image studio assets - LinkedIn requirements file - Various dependency updates
This commit is contained in:
@@ -6,6 +6,7 @@ Leverages existing non-AI SEO tools and uses single AI prompt for structured ana
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import math
|
||||
import re
|
||||
import textstat
|
||||
from datetime import datetime
|
||||
@@ -34,7 +35,7 @@ class BlogContentSEOAnalyzer:
|
||||
|
||||
logger.info("BlogContentSEOAnalyzer initialized")
|
||||
|
||||
async def analyze_blog_content(self, blog_content: str, research_data: Dict[str, Any], blog_title: Optional[str] = None, user_id: str = None) -> Dict[str, Any]:
|
||||
async def analyze_blog_content(self, blog_content: str, research_data: Dict[str, Any], blog_title: Optional[str] = None, user_id: str = None, outline: Optional[List[Dict[str, Any]]] = None, competitive_advantage: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Main analysis method with parallel processing
|
||||
|
||||
@@ -43,6 +44,8 @@ class BlogContentSEOAnalyzer:
|
||||
research_data: Research data containing keywords and other insights
|
||||
blog_title: Optional blog title
|
||||
user_id: Clerk user ID for subscription checking (required)
|
||||
outline: Optional outline sections for context-aware analysis
|
||||
competitive_advantage: Optional competitive advantage for context
|
||||
|
||||
Returns:
|
||||
Comprehensive SEO analysis results
|
||||
@@ -52,21 +55,24 @@ class BlogContentSEOAnalyzer:
|
||||
try:
|
||||
logger.info("Starting blog content SEO analysis")
|
||||
|
||||
# Extract keywords from research data
|
||||
keywords_data = self._extract_keywords_from_research(research_data)
|
||||
logger.info(f"Extracted keywords: {keywords_data}")
|
||||
# Extract research context (keywords + competitor data + search queries)
|
||||
research_context = self._extract_research_context(research_data)
|
||||
logger.info(f"Extracted research context with {len(research_context.get('primary', []))} primary keywords")
|
||||
|
||||
# Phase 1: Run non-AI analyzers in parallel
|
||||
logger.info("Running non-AI analyzers in parallel")
|
||||
non_ai_results = await self._run_non_ai_analyzers(blog_content, keywords_data)
|
||||
non_ai_results = await self._run_non_ai_analyzers(blog_content, research_context)
|
||||
|
||||
# Phase 2: Single AI analysis for structured insights
|
||||
# Phase 2: Single AI analysis for structured insights (with outline + competitive context)
|
||||
logger.info("Running AI analysis")
|
||||
ai_insights = await self._run_ai_analysis(blog_content, keywords_data, non_ai_results, user_id=user_id)
|
||||
ai_insights = await self._run_ai_analysis(
|
||||
blog_content, research_context, non_ai_results, user_id=user_id,
|
||||
outline=outline, competitive_advantage=competitive_advantage
|
||||
)
|
||||
|
||||
# Phase 3: Compile and format results
|
||||
logger.info("Compiling results")
|
||||
results = self._compile_blog_seo_results(non_ai_results, ai_insights, keywords_data)
|
||||
results = self._compile_blog_seo_results(non_ai_results, ai_insights, research_context)
|
||||
|
||||
logger.info(f"SEO analysis completed. Overall score: {results.get('overall_score', 0)}")
|
||||
return results
|
||||
@@ -76,14 +82,19 @@ class BlogContentSEOAnalyzer:
|
||||
# Fail fast - don't return fallback data
|
||||
raise e
|
||||
|
||||
def _extract_keywords_from_research(self, research_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract keywords from research data"""
|
||||
def _extract_research_context(self, research_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract research context from research data including keywords, competitor data, and search queries.
|
||||
|
||||
Previously only extracted keyword_analysis. Now also extracts:
|
||||
- competitor_analysis (content_gaps, industry_leaders, opportunities, competitive_advantages)
|
||||
- search_queries
|
||||
- suggested_angles
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Extracting keywords from research data: {research_data}")
|
||||
logger.info(f"Extracting research context from research data")
|
||||
|
||||
# Extract keywords from research data structure
|
||||
keyword_analysis = research_data.get('keyword_analysis', {})
|
||||
logger.info(f"Found keyword_analysis: {keyword_analysis}")
|
||||
|
||||
# Handle different possible structures
|
||||
primary_keywords = []
|
||||
@@ -109,17 +120,37 @@ class BlogContentSEOAnalyzer:
|
||||
'long_tail': long_tail_keywords,
|
||||
'semantic': semantic_keywords,
|
||||
'all_keywords': all_keywords,
|
||||
'search_intent': keyword_analysis.get('search_intent', 'informational')
|
||||
'search_intent': keyword_analysis.get('search_intent', 'informational'),
|
||||
}
|
||||
|
||||
logger.info(f"Extracted keywords: {result}")
|
||||
# Extract competitor analysis
|
||||
competitor_analysis = research_data.get('competitor_analysis', {})
|
||||
if competitor_analysis:
|
||||
result['content_gaps'] = competitor_analysis.get('content_gaps', [])
|
||||
result['industry_leaders'] = competitor_analysis.get('industry_leaders', [])
|
||||
result['opportunities'] = competitor_analysis.get('opportunities', [])
|
||||
result['competitive_advantages'] = competitor_analysis.get('competitive_advantages', [])
|
||||
else:
|
||||
result['content_gaps'] = []
|
||||
result['industry_leaders'] = []
|
||||
result['opportunities'] = []
|
||||
result['competitive_advantages'] = []
|
||||
|
||||
# Extract search queries
|
||||
search_queries = research_data.get('search_queries', [])
|
||||
result['search_queries'] = search_queries if isinstance(search_queries, list) else []
|
||||
|
||||
# Extract suggested angles
|
||||
suggested_angles = research_data.get('suggested_angles', [])
|
||||
result['suggested_angles'] = suggested_angles if isinstance(suggested_angles, list) else []
|
||||
|
||||
logger.info(f"Extracted research context: {len(primary_keywords)} primary keywords, {len(result.get('content_gaps', []))} content gaps, {len(result.get('search_queries', []))} search queries")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to extract keywords from research data: {e}")
|
||||
logger.error(f"Failed to extract research context from research data: {e}")
|
||||
logger.error(f"Research data structure: {research_data}")
|
||||
# Fail fast - don't return empty keywords
|
||||
raise ValueError(f"Keyword extraction failed: {e}")
|
||||
raise ValueError(f"Research context extraction failed: {e}")
|
||||
|
||||
async def _run_non_ai_analyzers(self, blog_content: str, keywords_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Run all non-AI analyzers in parallel for maximum performance"""
|
||||
@@ -170,10 +201,24 @@ class BlogContentSEOAnalyzer:
|
||||
sentences = len(re.findall(r'[.!?]+', content))
|
||||
|
||||
# Blog-specific structure analysis
|
||||
has_introduction = any('introduction' in line.lower() or 'overview' in line.lower()
|
||||
for line in lines[:10])
|
||||
has_conclusion = any('conclusion' in line.lower() or 'summary' in line.lower()
|
||||
for line in lines[-10:])
|
||||
content_lower = content.lower()
|
||||
first_500 = content_lower[:500] if len(content) > 500 else content_lower
|
||||
last_500 = content_lower[-500:] if len(content) > 500 else content_lower
|
||||
has_introduction = any('introduction' in line.lower() or 'overview' in line.lower()
|
||||
for line in lines[:10]) or any(
|
||||
phrase in first_500 for phrase in [
|
||||
'in this', 'this article', 'this guide', 'this post',
|
||||
'we will', "you'll learn", "let's explore", "whether you're",
|
||||
'in this section', 'this blog post', 'here we', 'today we',
|
||||
"we'll explore", "we'll cover", "we'll dive"
|
||||
])
|
||||
has_conclusion = any('conclusion' in line.lower() or 'summary' in line.lower()
|
||||
for line in lines[-10:]) or any(
|
||||
phrase in last_500 for phrase in [
|
||||
'in conclusion', 'to summarize', 'in summary', 'bottom line',
|
||||
'key takeaways', 'remember that', "as we've seen", 'wrapping up',
|
||||
'final thoughts', 'to conclude', 'in short', 'overall'
|
||||
])
|
||||
has_cta = any('call to action' in line.lower() or 'learn more' in line.lower()
|
||||
for line in lines)
|
||||
|
||||
@@ -187,7 +232,7 @@ class BlogContentSEOAnalyzer:
|
||||
'has_conclusion': has_conclusion,
|
||||
'has_call_to_action': has_cta,
|
||||
'structure_score': structure_score,
|
||||
'recommendations': self._get_structure_recommendations(sections, has_introduction, has_conclusion)
|
||||
'recommendations': self._get_structure_recommendations(sections, has_introduction, has_conclusion, content)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Content structure analysis failed: {e}")
|
||||
@@ -332,33 +377,36 @@ class BlogContentSEOAnalyzer:
|
||||
raise e
|
||||
|
||||
# Helper methods for calculations and scoring
|
||||
|
||||
@staticmethod
|
||||
def _sigmoid(x: float, midpoint: float = 0.0, steepness: float = 1.0) -> float:
|
||||
"""Sigmoid function for smooth scoring curves. Returns 0-1."""
|
||||
try:
|
||||
return 1.0 / (1.0 + math.exp(-steepness * (x - midpoint)))
|
||||
except OverflowError:
|
||||
return 0.0 if x < midpoint else 1.0
|
||||
|
||||
def _calculate_structure_score(self, sections: int, paragraphs: int, has_intro: bool, has_conclusion: bool) -> int:
|
||||
"""Calculate content structure score"""
|
||||
score = 0
|
||||
|
||||
# Section count (optimal: 3-8 sections)
|
||||
if 3 <= sections <= 8:
|
||||
score += 30
|
||||
elif sections < 3:
|
||||
score += 15
|
||||
else:
|
||||
score += 20
|
||||
|
||||
# Paragraph count (optimal: 8-20 paragraphs)
|
||||
if 8 <= paragraphs <= 20:
|
||||
score += 30
|
||||
elif paragraphs < 8:
|
||||
score += 15
|
||||
else:
|
||||
score += 20
|
||||
|
||||
# Introduction and conclusion
|
||||
if has_intro:
|
||||
score += 20
|
||||
if has_conclusion:
|
||||
score += 20
|
||||
|
||||
return min(score, 100)
|
||||
"""Calculate content structure score using continuous curves instead of rigid brackets.
|
||||
|
||||
Sections: optimal around 5, steep penalties below 3 or above 10.
|
||||
Paragraphs: optimal around 12, steep penalties below 5 or above 25.
|
||||
Intro/conclusion: binary bonuses.
|
||||
"""
|
||||
# Section score: peaks around 4-6, decays smoothly for low or high counts
|
||||
section_score = self._sigmoid(sections, midpoint=4, steepness=0.8) * 40
|
||||
if sections > 8:
|
||||
section_score = max(section_score * 0.7, 10)
|
||||
|
||||
# Paragraph score: peaks around 12, decays for low or high counts
|
||||
para_score = self._sigmoid(paragraphs, midpoint=10, steepness=0.3) * 40
|
||||
if paragraphs > 25:
|
||||
para_score = max(para_score * 0.6, 8)
|
||||
|
||||
intro_score = 10 if has_intro else 0
|
||||
conclusion_score = 10 if has_conclusion else 0
|
||||
|
||||
return int(min(max(section_score + para_score + intro_score + conclusion_score, 5), 100))
|
||||
|
||||
def _calculate_keyword_density(self, content: str, keyword: str) -> float:
|
||||
"""Calculate keyword density percentage"""
|
||||
@@ -397,21 +445,20 @@ class BlogContentSEOAnalyzer:
|
||||
return total_words / len(paragraphs)
|
||||
|
||||
def _calculate_readability_score(self, metrics: Dict[str, float]) -> int:
|
||||
"""Calculate overall readability score"""
|
||||
# Flesch Reading Ease (0-100, higher is better)
|
||||
flesch_score = metrics.get('flesch_reading_ease', 0)
|
||||
|
||||
# Convert to 0-100 scale
|
||||
if flesch_score >= 80:
|
||||
return 90
|
||||
elif flesch_score >= 60:
|
||||
return 80
|
||||
elif flesch_score >= 40:
|
||||
return 70
|
||||
elif flesch_score >= 20:
|
||||
return 60
|
||||
else:
|
||||
return 50
|
||||
"""Calculate readability score using a continuous sigmoid curve on Flesch Reading Ease.
|
||||
|
||||
Maps Flesch 0-100 to a score that:
|
||||
- Below 30: 25-45 (hard to read)
|
||||
- 30-50: 45-65 (moderate)
|
||||
- 50-70: 65-85 (good range)
|
||||
- 70-90: 85-95 (excellent)
|
||||
- Above 90: 95-100 (very easy)
|
||||
"""
|
||||
flesch = metrics.get('flesch_reading_ease', 0)
|
||||
score = self._sigmoid(flesch, midpoint=50, steepness=0.06) * 70 + 25
|
||||
if flesch > 80:
|
||||
score = min(score + 5, 100)
|
||||
return int(min(max(score, 20), 100))
|
||||
|
||||
def _determine_target_audience(self, metrics: Dict[str, float]) -> str:
|
||||
"""Determine target audience based on readability metrics"""
|
||||
@@ -427,183 +474,228 @@ class BlogContentSEOAnalyzer:
|
||||
return "Graduate level"
|
||||
|
||||
def _calculate_content_depth_score(self, word_count: int, vocabulary_diversity: float) -> int:
|
||||
"""Calculate content depth score"""
|
||||
score = 0
|
||||
|
||||
# Word count (optimal: 800-2000 words)
|
||||
if 800 <= word_count <= 2000:
|
||||
score += 50
|
||||
elif word_count < 800:
|
||||
score += 30
|
||||
else:
|
||||
score += 40
|
||||
|
||||
# Vocabulary diversity (optimal: 0.4-0.7)
|
||||
if 0.4 <= vocabulary_diversity <= 0.7:
|
||||
score += 50
|
||||
elif vocabulary_diversity < 0.4:
|
||||
score += 30
|
||||
else:
|
||||
score += 40
|
||||
|
||||
return min(score, 100)
|
||||
"""Calculate content depth score using continuous curves.
|
||||
|
||||
Word count: sigmoid peaks around 1200, gentle decay for long content.
|
||||
Vocabulary diversity: sigmoid peaks around 0.55, decay for low or high diversity.
|
||||
"""
|
||||
# Word count score: optimal around 1000-1500, smooth decay below 500
|
||||
word_score = self._sigmoid(word_count, midpoint=800, steepness=0.005) * 55
|
||||
if word_count > 3000:
|
||||
word_score = min(word_score, 40)
|
||||
elif word_count < 300:
|
||||
word_score = min(word_score, 15)
|
||||
|
||||
# Vocabulary diversity score: optimal around 0.5-0.65, too high is repetitive, too low is shallow
|
||||
diversity_score = self._sigmoid(vocabulary_diversity, midpoint=0.45, steepness=12) * 45
|
||||
if vocabulary_diversity < 0.3:
|
||||
diversity_score = min(diversity_score, 15)
|
||||
|
||||
return int(min(max(word_score + diversity_score, 5), 100))
|
||||
|
||||
def _calculate_flow_score(self, transition_count: int, word_count: int) -> int:
|
||||
"""Calculate content flow score"""
|
||||
"""Calculate content flow score using continuous curve.
|
||||
|
||||
Transition density is typically low (most content has 0.5-3 per 100 words
|
||||
of the specific transition words we track). The sigmoid midpoint is set at 1.0
|
||||
with moderate steepness to produce a reasonable spread.
|
||||
"""
|
||||
if word_count == 0:
|
||||
return 0
|
||||
|
||||
return 15
|
||||
|
||||
transition_density = transition_count / (word_count / 100)
|
||||
|
||||
# Optimal transition density: 1-3 per 100 words
|
||||
if 1 <= transition_density <= 3:
|
||||
return 90
|
||||
elif transition_density < 1:
|
||||
return 60
|
||||
else:
|
||||
return 70
|
||||
|
||||
# Sigmoid centered at 1.0 (decent density), moderate steepness
|
||||
score = self._sigmoid(transition_density, midpoint=1.0, steepness=2.5) * 50 + 40
|
||||
if transition_density > 5:
|
||||
score = max(score - 10, 35)
|
||||
return int(min(max(score, 15), 100))
|
||||
|
||||
def _calculate_heading_hierarchy_score(self, h1: List[str], h2: List[str], h3: List[str]) -> int:
|
||||
"""Calculate heading hierarchy score"""
|
||||
score = 0
|
||||
|
||||
# Should have exactly 1 H1
|
||||
if len(h1) == 1:
|
||||
score += 40
|
||||
elif len(h1) == 0:
|
||||
score += 20
|
||||
"""Calculate heading hierarchy score using continuous curves.
|
||||
|
||||
H1: 1 is ideal, score decays for 0 or 2+.
|
||||
H2: 4-6 is ideal, score decays for low or high counts.
|
||||
H3: presence adds bonus.
|
||||
"""
|
||||
# H1 score: clear peak at 1
|
||||
h1_count = len(h1)
|
||||
if h1_count == 1:
|
||||
h1_score = 40
|
||||
elif h1_count == 0:
|
||||
h1_score = 15
|
||||
else:
|
||||
score += 10
|
||||
|
||||
# Should have 3-8 H2 headings
|
||||
if 3 <= len(h2) <= 8:
|
||||
score += 40
|
||||
elif len(h2) < 3:
|
||||
score += 20
|
||||
else:
|
||||
score += 30
|
||||
|
||||
# H3 headings are optional but good for structure
|
||||
if len(h3) > 0:
|
||||
score += 20
|
||||
|
||||
return min(score, 100)
|
||||
h1_score = max(40 // h1_count, 8)
|
||||
|
||||
# H2 score: sigmoid peaks around 4-6
|
||||
h2_count = len(h2)
|
||||
h2_score = self._sigmoid(h2_count, midpoint=4, steepness=1.0) * 40
|
||||
if h2_count == 0:
|
||||
h2_score = 5
|
||||
elif h2_count > 10:
|
||||
h2_score = max(h2_score * 0.6, 10)
|
||||
|
||||
# H3 bonus: presence is good, diminishing returns
|
||||
h3_score = min(len(h3) * 5, 20)
|
||||
|
||||
return int(min(max(h1_score + h2_score + h3_score, 10), 100))
|
||||
|
||||
def _calculate_keyword_score(self, keyword_analysis: Dict[str, Any]) -> int:
|
||||
"""Calculate keyword optimization score"""
|
||||
score = 0
|
||||
|
||||
# Check keyword density (optimal: 1-3%)
|
||||
"""Calculate keyword optimization score using continuous curves.
|
||||
|
||||
Density: sigmoid centered at 2%, smooth peak.
|
||||
Heading presence: binary bonus per keyword.
|
||||
Early occurrence: sigmoid bonus.
|
||||
Missing/over-optimization: smooth penalties.
|
||||
"""
|
||||
density_score = 0
|
||||
heading_bonus = 0
|
||||
early_bonus = 0
|
||||
|
||||
densities = keyword_analysis.get('keyword_density', {})
|
||||
keyword_count = max(len(densities), 1)
|
||||
|
||||
for keyword, density in densities.items():
|
||||
if 1 <= density <= 3:
|
||||
score += 30
|
||||
elif density < 1:
|
||||
score += 15
|
||||
else:
|
||||
score += 10
|
||||
|
||||
# Check keyword distribution
|
||||
# Density score: smooth peak at 1-3%, sigmoid curve
|
||||
density_contribution = self._sigmoid(density, midpoint=2.0, steepness=2.0) * 30
|
||||
if density > 4:
|
||||
density_contribution *= 0.5 # penalty for over-optimization
|
||||
density_score += density_contribution
|
||||
|
||||
density_score = density_score / keyword_count
|
||||
|
||||
# Heading presence bonus
|
||||
distributions = keyword_analysis.get('keyword_distribution', {})
|
||||
for keyword, dist in distributions.items():
|
||||
if dist.get('in_headings', False):
|
||||
score += 20
|
||||
if dist.get('first_occurrence', -1) < 100: # Early occurrence
|
||||
score += 20
|
||||
|
||||
# Penalize missing keywords
|
||||
missing = len(keyword_analysis.get('missing_keywords', []))
|
||||
score -= missing * 10
|
||||
|
||||
# Penalize over-optimization
|
||||
over_opt = len(keyword_analysis.get('over_optimization', []))
|
||||
score -= over_opt * 15
|
||||
|
||||
return max(0, min(score, 100))
|
||||
heading_bonus += 15
|
||||
first_occ = dist.get('first_occurrence', -1)
|
||||
if isinstance(first_occ, (int, float)) and 0 <= first_occ < 150:
|
||||
early_bonus += int(self._sigmoid(first_occ, midpoint=75, steepness=-0.04) * 15)
|
||||
|
||||
# Penalize missing keywords and over-optimization
|
||||
missing_penalty = len(keyword_analysis.get('missing_keywords', [])) * 8
|
||||
over_opt_penalty = len(keyword_analysis.get('over_optimization', [])) * 12
|
||||
|
||||
raw = density_score + heading_bonus + early_bonus - missing_penalty - over_opt_penalty
|
||||
return int(min(max(raw, 5), 100))
|
||||
|
||||
def _calculate_weighted_score(self, scores: Dict[str, int]) -> int:
|
||||
"""Calculate weighted overall score"""
|
||||
"""Calculate weighted overall score.
|
||||
|
||||
AI insight engagement_score is unreliable (no ground truth) so it's excluded
|
||||
from the overall score. The remaining 5 categories are re-weighted to sum to 1.0.
|
||||
AI insights are still reported in category_scores for display but don't affect
|
||||
the overall score.
|
||||
"""
|
||||
weights = {
|
||||
'structure': 0.2,
|
||||
'structure': 0.20,
|
||||
'keywords': 0.25,
|
||||
'readability': 0.2,
|
||||
'quality': 0.15,
|
||||
'headings': 0.1,
|
||||
'ai_insights': 0.1
|
||||
'readability': 0.20,
|
||||
'quality': 0.20,
|
||||
'headings': 0.15,
|
||||
}
|
||||
|
||||
|
||||
weighted_sum = sum(scores.get(key, 0) * weight for key, weight in weights.items())
|
||||
return int(weighted_sum)
|
||||
return int(min(max(weighted_sum, 0), 100))
|
||||
|
||||
# Recommendation methods
|
||||
def _get_structure_recommendations(self, sections: int, has_intro: bool, has_conclusion: bool) -> List[str]:
|
||||
"""Get structure recommendations"""
|
||||
def _get_structure_recommendations(self, sections: int, has_intro: bool, has_conclusion: bool, content: str = '') -> List[str]:
|
||||
"""Get structure recommendations based on actual content analysis"""
|
||||
recommendations = []
|
||||
|
||||
|
||||
if sections < 3:
|
||||
recommendations.append("Add more sections to improve content structure")
|
||||
recommendations.append("Add more sections to improve content structure and topic coverage")
|
||||
elif sections > 8:
|
||||
recommendations.append("Consider combining some sections for better flow")
|
||||
|
||||
if not has_intro:
|
||||
recommendations.append("Add an introduction section to set context")
|
||||
|
||||
if not has_conclusion:
|
||||
recommendations.append("Add a conclusion section to summarize key points")
|
||||
|
||||
recommendations.append("Consider combining some sections for better flow and readability")
|
||||
|
||||
# More robust intro detection: check first 200 chars for first-person address,
|
||||
# question, or general hook — not just keyword matching
|
||||
first_200 = (content[:500] if content else '').lower()
|
||||
intro_indicators = any([
|
||||
has_intro,
|
||||
'?' in first_200[:200],
|
||||
any(phrase in first_200 for phrase in ['in this', 'this article', 'this guide', 'this post', 'we will', "you'll learn", "let's explore", "whether you're"]),
|
||||
first_200.strip().startswith('# '),
|
||||
])
|
||||
if not intro_indicators:
|
||||
recommendations.append("Add an introduction that hooks the reader and previews key topics")
|
||||
|
||||
# More robust conclusion detection
|
||||
last_500 = (content[-500:] if content else '').lower()
|
||||
conclusion_indicators = any([
|
||||
has_conclusion,
|
||||
any(phrase in last_500 for phrase in ['in conclusion', 'to summarize', 'in summary', 'bottom line', 'key takeaways', 'remember that', 'as we\'ve seen']),
|
||||
])
|
||||
if not conclusion_indicators:
|
||||
recommendations.append("Add a conclusion to summarize key points and provide next steps")
|
||||
|
||||
return recommendations
|
||||
|
||||
def _get_readability_recommendations(self, metrics: Dict[str, float], avg_sentence_length: float) -> List[str]:
|
||||
"""Get readability recommendations"""
|
||||
"""Get readability recommendations with specific, actionable guidance"""
|
||||
recommendations = []
|
||||
|
||||
|
||||
flesch_score = metrics.get('flesch_reading_ease', 0)
|
||||
|
||||
if flesch_score < 60:
|
||||
recommendations.append("Simplify language and use shorter sentences")
|
||||
|
||||
if avg_sentence_length > 20:
|
||||
recommendations.append("Break down long sentences for better readability")
|
||||
|
||||
if flesch_score > 80:
|
||||
recommendations.append("Consider adding more technical depth for expert audience")
|
||||
|
||||
|
||||
if flesch_score < 30:
|
||||
recommendations.append("Content is very difficult to read — shorten sentences, use simpler words, and break up complex ideas")
|
||||
elif flesch_score < 50:
|
||||
recommendations.append("Content is fairly complex — consider simplifying some sentences and adding more plain-language explanations")
|
||||
|
||||
if avg_sentence_length > 25:
|
||||
recommendations.append(f"Average sentence length is {avg_sentence_length:.0f} words — aim for 15-20 words per sentence for better readability")
|
||||
elif avg_sentence_length > 20:
|
||||
recommendations.append("Some sentences may be too long — try breaking a few into shorter ones for easier reading")
|
||||
|
||||
if flesch_score > 80 and flesch_score < 95:
|
||||
recommendations.append("Readability is very good — consider adding slightly more technical depth for expert credibility")
|
||||
|
||||
return recommendations
|
||||
|
||||
def _get_content_quality_recommendations(self, word_count: int, vocabulary_diversity: float, transition_count: int) -> List[str]:
|
||||
"""Get content quality recommendations"""
|
||||
"""Get content quality recommendations with specific, actionable guidance"""
|
||||
recommendations = []
|
||||
|
||||
if word_count < 800:
|
||||
recommendations.append("Expand content with more detailed explanations")
|
||||
elif word_count > 2000:
|
||||
recommendations.append("Consider breaking into multiple posts")
|
||||
|
||||
if vocabulary_diversity < 0.4:
|
||||
recommendations.append("Use more varied vocabulary to improve engagement")
|
||||
|
||||
if transition_count < 3:
|
||||
recommendations.append("Add more transition words to improve flow")
|
||||
|
||||
|
||||
if word_count < 400:
|
||||
recommendations.append("Content is significantly underdeveloped — expand with detailed explanations, examples, and supporting evidence")
|
||||
elif word_count < 800:
|
||||
recommendations.append("Content is thin — add depth with specific examples, data points, and detailed explanations for each section")
|
||||
elif word_count > 3000:
|
||||
recommendations.append("Content is very long — consider whether all sections are necessary or if some could be a separate post")
|
||||
|
||||
if vocabulary_diversity < 0.35:
|
||||
recommendations.append("Vocabulary is highly repetitive — use synonyms and varied phrasing to improve engagement")
|
||||
elif vocabulary_diversity < 0.45:
|
||||
recommendations.append("Vocabulary variety could be improved — try rephrasing repeated terms for more natural flow")
|
||||
|
||||
if transition_count < 2:
|
||||
recommendations.append("Very few transition words found — add connectors like 'however', 'therefore', 'furthermore' between ideas")
|
||||
elif transition_count < 5:
|
||||
recommendations.append("Add more transition words to improve the flow between paragraphs and sections")
|
||||
|
||||
return recommendations
|
||||
|
||||
def _get_heading_recommendations(self, h1: List[str], h2: List[str], h3: List[str]) -> List[str]:
|
||||
"""Get heading recommendations"""
|
||||
"""Get heading recommendations with specific, actionable guidance"""
|
||||
recommendations = []
|
||||
|
||||
|
||||
if len(h1) == 0:
|
||||
recommendations.append("Add a main H1 heading")
|
||||
recommendations.append("Add a main H1 heading — this is the primary title for both readers and search engines")
|
||||
elif len(h1) > 1:
|
||||
recommendations.append("Use only one H1 heading per post")
|
||||
|
||||
recommendations.append(f"Found {len(h1)} H1 headings — use only one H1 per post for clarity. Convert extras to H2.")
|
||||
|
||||
if len(h2) < 3:
|
||||
recommendations.append("Add more H2 headings to structure content")
|
||||
elif len(h2) > 8:
|
||||
recommendations.append("Consider using H3 headings for better hierarchy")
|
||||
|
||||
recommendations.append(f"Only {len(h2)} H2 headings found — add section headings to break up content and improve scanning")
|
||||
elif len(h2) > 10:
|
||||
recommendations.append(f"{len(h2)} H2 headings may be too many — consider using H3 subheadings within sections for better hierarchy")
|
||||
|
||||
if len(h2) >= 3 and len(h3) == 0 and len(h2) > 5:
|
||||
recommendations.append("Consider adding H3 subheadings within longer H2 sections for better content hierarchy")
|
||||
|
||||
return recommendations
|
||||
|
||||
async def _run_ai_analysis(self, blog_content: str, keywords_data: Dict[str, Any], non_ai_results: Dict[str, Any], user_id: str = None) -> Dict[str, Any]:
|
||||
async def _run_ai_analysis(self, blog_content: str, keywords_data: Dict[str, Any], non_ai_results: Dict[str, Any], user_id: str = None, outline: Optional[List[Dict[str, Any]]] = None, competitive_advantage: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Run single AI analysis for structured insights (provider-agnostic)"""
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for subscription checking. Please provide Clerk user ID.")
|
||||
@@ -612,7 +704,9 @@ class BlogContentSEOAnalyzer:
|
||||
context = {
|
||||
'blog_content': blog_content,
|
||||
'keywords_data': keywords_data,
|
||||
'non_ai_results': non_ai_results
|
||||
'non_ai_results': non_ai_results,
|
||||
'outline': outline or [],
|
||||
'competitive_advantage': competitive_advantage or '',
|
||||
}
|
||||
|
||||
# Create AI prompt for structured analysis
|
||||
@@ -624,10 +718,18 @@ class BlogContentSEOAnalyzer:
|
||||
"content_quality_insights": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"engagement_score": {"type": "number"},
|
||||
"value_proposition": {"type": "string"},
|
||||
"content_gaps": {"type": "array", "items": {"type": "string"}},
|
||||
"improvement_suggestions": {"type": "array", "items": {"type": "string"}}
|
||||
"improvement_suggestions": {"type": "array", "items": {"type": "string"}},
|
||||
"content_depth_indicators": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"has_specific_data_points": {"type": "boolean"},
|
||||
"has_examples_or_illustrations": {"type": "boolean"},
|
||||
"has_actionable_takeaways": {"type": "boolean"},
|
||||
"depth_assessment": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"seo_optimization_insights": {
|
||||
@@ -648,13 +750,12 @@ class BlogContentSEOAnalyzer:
|
||||
"ux_improvements": {"type": "array", "items": {"type": "string"}}
|
||||
}
|
||||
},
|
||||
"competitive_analysis": {
|
||||
"content_strengths": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content_differentiation": {"type": "string"},
|
||||
"unique_value": {"type": "string"},
|
||||
"competitive_advantages": {"type": "array", "items": {"type": "string"}},
|
||||
"market_positioning": {"type": "string"}
|
||||
"strongest_sections": {"type": "array", "items": {"type": "string"}},
|
||||
"unique_value_points": {"type": "array", "items": {"type": "string"}},
|
||||
"reader_value_assessment": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -675,37 +776,85 @@ class BlogContentSEOAnalyzer:
|
||||
raise e
|
||||
|
||||
def _create_ai_analysis_prompt(self, context: Dict[str, Any]) -> str:
|
||||
"""Create AI analysis prompt"""
|
||||
"""Create AI analysis prompt with research context and outline awareness"""
|
||||
blog_content = context['blog_content']
|
||||
keywords_data = context['keywords_data']
|
||||
non_ai_results = context['non_ai_results']
|
||||
outline = context.get('outline', [])
|
||||
competitive_advantage = context.get('competitive_advantage', '')
|
||||
|
||||
# Build outline context
|
||||
outline_text = ""
|
||||
if outline:
|
||||
section_names = []
|
||||
for sec in outline[:8]:
|
||||
heading = sec.get('heading', '') if isinstance(sec, dict) else getattr(sec, 'heading', '')
|
||||
subheadings = sec.get('subheadings', []) if isinstance(sec, dict) else getattr(sec, 'subheadings', [])
|
||||
sub_text = f" (subtopics: {', '.join(subheadings[:4])})" if subheadings else ""
|
||||
target_words = sec.get('target_words', '') if isinstance(sec, dict) else getattr(sec, 'target_words', '')
|
||||
word_text = f" [~{target_words} words]" if target_words else ""
|
||||
section_names.append(f" - {heading}{sub_text}{word_text}")
|
||||
outline_text = "\n".join(section_names)
|
||||
|
||||
# Build research context block
|
||||
research_block = ""
|
||||
content_gaps = keywords_data.get('content_gaps', [])
|
||||
competitive_advantages = keywords_data.get('competitive_advantages', [])
|
||||
search_queries = keywords_data.get('search_queries', [])
|
||||
suggested_angles = keywords_data.get('suggested_angles', [])
|
||||
industry_leaders = keywords_data.get('industry_leaders', [])
|
||||
|
||||
if content_gaps:
|
||||
research_block += f"\nCONTENT GAPS (from competitor analysis): {', '.join(content_gaps[:5])}"
|
||||
if competitive_advantages:
|
||||
research_block += f"\nOUR COMPETITIVE ADVANTAGES: {', '.join(competitive_advantages[:3])}"
|
||||
if competitive_advantage:
|
||||
research_block += f"\nFOCUSED COMPETITIVE ADVANTAGE: {competitive_advantage}"
|
||||
if search_queries:
|
||||
research_block += f"\nORIGINAL SEARCH QUERIES: {', '.join(search_queries[:5])}"
|
||||
if suggested_angles:
|
||||
research_block += f"\nPLANNED CONTENT ANGLES: {', '.join(suggested_angles[:3])}"
|
||||
if industry_leaders:
|
||||
research_block += f"\nINDUSTRY LEADERS: {', '.join(industry_leaders[:3])}"
|
||||
|
||||
prompt = f"""
|
||||
Analyze this blog content for SEO optimization and user experience. Provide structured insights based on the content and keyword data.
|
||||
Analyze this blog content for SEO optimization and user experience. Provide structured insights based ONLY on what is actually present in the content and keyword data. Do NOT fabricate data, statistics, competitor names, or case studies that are not in the content.
|
||||
|
||||
BLOG CONTENT:
|
||||
{blog_content[:2000]}...
|
||||
{blog_content[:3000]}...
|
||||
|
||||
KEYWORDS DATA:
|
||||
Primary Keywords: {keywords_data.get('primary', [])}
|
||||
Long-tail Keywords: {keywords_data.get('long_tail', [])}
|
||||
Semantic Keywords: {keywords_data.get('semantic', [])}
|
||||
Search Intent: {keywords_data.get('search_intent', 'informational')}
|
||||
Search Intent: {keywords_data.get('search_intent', 'informational')}{research_block}
|
||||
|
||||
NON-AI ANALYSIS RESULTS:
|
||||
Structure Score: {non_ai_results.get('content_structure', {}).get('structure_score', 0)}
|
||||
Readability Score: {non_ai_results.get('readability_analysis', {}).get('readability_score', 0)}
|
||||
Content Quality Score: {non_ai_results.get('content_quality', {}).get('content_depth_score', 0)}
|
||||
MEASURED ANALYSIS RESULTS:
|
||||
Structure Score: {non_ai_results.get('content_structure', {}).get('structure_score', 0)}/100
|
||||
Readability Score: {non_ai_results.get('readability_analysis', {}).get('readability_score', 0)}/100
|
||||
Content Quality Score: {non_ai_results.get('content_quality', {}).get('content_depth_score', 0)}/100
|
||||
Heading Hierarchy Score: {non_ai_results.get('heading_structure', {}).get('heading_hierarchy_score', 0)}/100
|
||||
Word Count: {non_ai_results.get('content_quality', {}).get('word_count', 0)}
|
||||
Sections: {non_ai_results.get('content_structure', {}).get('total_sections', 0)}
|
||||
Has Introduction: {non_ai_results.get('content_structure', {}).get('has_introduction', False)}
|
||||
Has Conclusion: {non_ai_results.get('content_structure', {}).get('has_conclusion', False)}{f"""
|
||||
|
||||
Please provide:
|
||||
1. Content Quality Insights: Assess engagement potential, value proposition, content gaps, and improvement suggestions
|
||||
2. SEO Optimization Insights: Evaluate keyword optimization, content relevance, search intent alignment, and SEO improvements
|
||||
3. User Experience Insights: Analyze content flow, readability, engagement factors, and UX improvements
|
||||
4. Competitive Analysis: Identify content differentiation, unique value, competitive advantages, and market positioning
|
||||
PLANNED OUTLINE STRUCTURE:
|
||||
{outline_text}""" if outline_text else ""}
|
||||
{f"""
|
||||
|
||||
Focus on actionable insights that can improve the blog's performance and user engagement.
|
||||
FOCUSED ADVANTAGE: {competitive_advantage}""" if competitive_advantage else ""}
|
||||
|
||||
IMPORTANT: SEO metadata (title tag, meta description, Open Graph tags, Twitter cards, JSON-LD schema) will be generated in a separate step. Do NOT recommend adding or improving meta descriptions, title tags, OG tags, or structured data markup — focus only on content-level improvements.
|
||||
|
||||
Provide:
|
||||
1. Content Quality Insights: Assess the value proposition based on actual content. Identify specific content gaps (what TOPICS from the planned outline or competitor analysis are missing; do NOT suggest adding case studies unless the content references specific studies). Suggest improvements grounded in what the content currently covers.
|
||||
2. Content Depth Indicators: Objectively assess whether the content contains specific data points, examples, or actionable takeaways. These are binary assessments based on what's actually in the text.
|
||||
3. SEO Optimization Insights: Evaluate keyword optimization based on the provided keyword data. Assess content relevance and search intent alignment relative to the original search queries.
|
||||
4. User Experience Insights: Analyze content flow and readability. Identify engagement factors present in the text.
|
||||
5. Content Strengths: Identify the strongest sections of the content by heading name. Note unique value points the content provides. Do NOT invent competitive advantages — only describe what makes THIS content valuable based on the competitive advantages and content gaps listed above.
|
||||
"""
|
||||
|
||||
|
||||
return prompt
|
||||
|
||||
def _compile_blog_seo_results(self, non_ai_results: Dict[str, Any], ai_insights: Dict[str, Any], keywords_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
@@ -719,13 +868,28 @@ class BlogContentSEOAnalyzer:
|
||||
raise ValueError("AI insights are missing")
|
||||
|
||||
# Calculate category scores
|
||||
# Compute ai_depth_score from measurable content_depth_indicators instead of
|
||||
# hallucinated engagement_score. If depth_indicators are present, score based on
|
||||
# boolean flags; otherwise default to 50 (neutral).
|
||||
ai_quality = ai_insights.get('content_quality_insights', {})
|
||||
depth_indicators = ai_quality.get('content_depth_indicators', {})
|
||||
if depth_indicators:
|
||||
depth_flags = [
|
||||
depth_indicators.get('has_specific_data_points', False),
|
||||
depth_indicators.get('has_examples_or_illustrations', False),
|
||||
depth_indicators.get('has_actionable_takeaways', False),
|
||||
]
|
||||
depth_score = 40 + (sum(depth_flags) * 20) # 40 baseline + 20 per true flag = 40-100
|
||||
else:
|
||||
depth_score = 50
|
||||
|
||||
category_scores = {
|
||||
'structure': non_ai_results.get('content_structure', {}).get('structure_score', 0),
|
||||
'keywords': self._calculate_keyword_score(non_ai_results.get('keyword_analysis', {})),
|
||||
'readability': non_ai_results.get('readability_analysis', {}).get('readability_score', 0),
|
||||
'quality': non_ai_results.get('content_quality', {}).get('content_depth_score', 0),
|
||||
'headings': non_ai_results.get('heading_structure', {}).get('heading_hierarchy_score', 0),
|
||||
'ai_insights': ai_insights.get('content_quality_insights', {}).get('engagement_score', 0)
|
||||
'ai_insights': depth_score
|
||||
}
|
||||
|
||||
# Calculate overall score
|
||||
@@ -757,7 +921,15 @@ class BlogContentSEOAnalyzer:
|
||||
def _compile_actionable_recommendations(self, non_ai_results: Dict[str, Any], ai_insights: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Compile actionable recommendations from all sources"""
|
||||
recommendations = []
|
||||
|
||||
|
||||
# Metadata-related keywords to filter out (handled by metadata generator)
|
||||
metadata_keywords = ['meta description', 'title tag', 'og tag', 'open graph',
|
||||
'twitter card', 'json-ld', 'schema markup', 'structured data markup']
|
||||
|
||||
def _is_metadata_rec(rec_text: str) -> bool:
|
||||
rec_lower = rec_text.lower()
|
||||
return any(kw in rec_lower for kw in metadata_keywords)
|
||||
|
||||
# Structure recommendations
|
||||
structure_recs = non_ai_results.get('content_structure', {}).get('recommendations', [])
|
||||
for rec in structure_recs:
|
||||
@@ -767,7 +939,7 @@ class BlogContentSEOAnalyzer:
|
||||
'recommendation': rec,
|
||||
'impact': 'Improves content organization and user experience'
|
||||
})
|
||||
|
||||
|
||||
# Keyword recommendations
|
||||
keyword_recs = non_ai_results.get('keyword_analysis', {}).get('recommendations', [])
|
||||
for rec in keyword_recs:
|
||||
@@ -777,7 +949,7 @@ class BlogContentSEOAnalyzer:
|
||||
'recommendation': rec,
|
||||
'impact': 'Improves search engine visibility'
|
||||
})
|
||||
|
||||
|
||||
# Readability recommendations
|
||||
readability_recs = non_ai_results.get('readability_analysis', {}).get('recommendations', [])
|
||||
for rec in readability_recs:
|
||||
@@ -787,17 +959,40 @@ class BlogContentSEOAnalyzer:
|
||||
'recommendation': rec,
|
||||
'impact': 'Improves user engagement and comprehension'
|
||||
})
|
||||
|
||||
# AI insights recommendations
|
||||
|
||||
# AI insights recommendations (filter out metadata-related recs)
|
||||
ai_recs = ai_insights.get('content_quality_insights', {}).get('improvement_suggestions', [])
|
||||
for rec in ai_recs:
|
||||
if not _is_metadata_rec(rec):
|
||||
recommendations.append({
|
||||
'category': 'Content Quality',
|
||||
'priority': 'Medium',
|
||||
'recommendation': rec,
|
||||
'impact': 'Enhances content value and engagement'
|
||||
})
|
||||
|
||||
# SEO improvement recommendations (filter metadata recs)
|
||||
seo_recs = ai_insights.get('seo_optimization_insights', {}).get('seo_improvements', [])
|
||||
for rec in seo_recs:
|
||||
if not _is_metadata_rec(rec):
|
||||
recommendations.append({
|
||||
'category': 'SEO',
|
||||
'priority': 'Medium',
|
||||
'recommendation': rec,
|
||||
'impact': 'Improves search engine optimization'
|
||||
})
|
||||
|
||||
# Content strengths as informational (lower priority)
|
||||
content_strengths = ai_insights.get('content_strengths', {})
|
||||
strong_sections = content_strengths.get('strongest_sections', [])
|
||||
if strong_sections:
|
||||
recommendations.append({
|
||||
'category': 'Content Quality',
|
||||
'priority': 'Medium',
|
||||
'recommendation': rec,
|
||||
'impact': 'Enhances content value and engagement'
|
||||
'category': 'Strengths',
|
||||
'priority': 'Low',
|
||||
'recommendation': f"Strongest sections: {', '.join(strong_sections[:3])}. Consider expanding these areas further.",
|
||||
'impact': 'Leverages existing content strengths'
|
||||
})
|
||||
|
||||
|
||||
return recommendations
|
||||
|
||||
def _create_visualization_data(self, category_scores: Dict[str, int], non_ai_results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
@@ -851,7 +1046,7 @@ class BlogContentSEOAnalyzer:
|
||||
'weakest_category': weakest_category[0],
|
||||
'key_strengths': self._identify_key_strengths(category_scores),
|
||||
'key_weaknesses': self._identify_key_weaknesses(category_scores),
|
||||
'ai_summary': ai_insights.get('content_quality_insights', {}).get('value_proposition', '')
|
||||
'ai_summary': ai_insights.get('content_quality_insights', {}).get('value_proposition', 'Content analysis completed.')
|
||||
}
|
||||
|
||||
def _identify_key_strengths(self, category_scores: Dict[str, int]) -> List[str]:
|
||||
|
||||
@@ -84,14 +84,14 @@ class BlogSEOMetadataGenerator:
|
||||
raise e
|
||||
|
||||
def _extract_keywords_from_research(self, research_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract keywords and context from research data"""
|
||||
"""Extract keywords and context from research data, including competitor analysis and content gaps."""
|
||||
try:
|
||||
keyword_analysis = research_data.get('keyword_analysis', {})
|
||||
|
||||
# Handle both 'semantic' and 'semantic_keywords' field names
|
||||
semantic_keywords = keyword_analysis.get('semantic', []) or keyword_analysis.get('semantic_keywords', [])
|
||||
|
||||
return {
|
||||
result = {
|
||||
'primary_keywords': keyword_analysis.get('primary', []),
|
||||
'long_tail_keywords': keyword_analysis.get('long_tail', []),
|
||||
'semantic_keywords': semantic_keywords,
|
||||
@@ -100,6 +100,30 @@ class BlogSEOMetadataGenerator:
|
||||
'target_audience': research_data.get('target_audience', 'general'),
|
||||
'industry': research_data.get('industry', 'general')
|
||||
}
|
||||
|
||||
# Extract competitor analysis context
|
||||
competitor_analysis = research_data.get('competitor_analysis', {})
|
||||
if competitor_analysis:
|
||||
result['content_gaps'] = competitor_analysis.get('content_gaps', [])
|
||||
result['industry_leaders'] = competitor_analysis.get('industry_leaders', [])
|
||||
result['opportunities'] = competitor_analysis.get('opportunities', [])
|
||||
result['competitive_advantages'] = competitor_analysis.get('competitive_advantages', [])
|
||||
else:
|
||||
result['content_gaps'] = []
|
||||
result['industry_leaders'] = []
|
||||
result['opportunities'] = []
|
||||
result['competitive_advantages'] = []
|
||||
|
||||
# Extract search queries
|
||||
search_queries = research_data.get('search_queries', [])
|
||||
result['search_queries'] = search_queries if isinstance(search_queries, list) else []
|
||||
|
||||
# Extract suggested angles
|
||||
suggested_angles = research_data.get('suggested_angles', [])
|
||||
result['suggested_angles'] = suggested_angles if isinstance(suggested_angles, list) else []
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to extract keywords from research: {e}")
|
||||
return {
|
||||
@@ -109,7 +133,13 @@ class BlogSEOMetadataGenerator:
|
||||
'all_keywords': [],
|
||||
'search_intent': 'informational',
|
||||
'target_audience': 'general',
|
||||
'industry': 'general'
|
||||
'industry': 'general',
|
||||
'content_gaps': [],
|
||||
'industry_leaders': [],
|
||||
'opportunities': [],
|
||||
'competitive_advantages': [],
|
||||
'search_queries': [],
|
||||
'suggested_angles': []
|
||||
}
|
||||
|
||||
async def _generate_core_metadata(
|
||||
@@ -194,18 +224,20 @@ class BlogSEOMetadataGenerator:
|
||||
# Check if we got a valid response
|
||||
if not ai_response or not isinstance(ai_response, dict):
|
||||
logger.error("Core metadata generation failed: Invalid response from LLM")
|
||||
# Return fallback response
|
||||
primary_keywords = ', '.join(keywords_data.get('primary_keywords', ['content']))
|
||||
# Return fallback response using content-derived values
|
||||
primary_kw = keywords_data.get('primary_keywords', ['content'])
|
||||
primary_kw_first = primary_kw[0] if primary_kw else 'content'
|
||||
word_count = len(blog_content.split())
|
||||
slug = re.sub(r'[^a-z0-9]+', '-', blog_title.lower())[:50].strip('-')
|
||||
return {
|
||||
'seo_title': blog_title,
|
||||
'meta_description': f'Learn about {primary_keywords.split(", ")[0] if primary_keywords else "this topic"}.',
|
||||
'url_slug': blog_title.lower().replace(' ', '-').replace(':', '').replace(',', '')[:50],
|
||||
'blog_tags': primary_keywords.split(', ') if primary_keywords else ['content'],
|
||||
'blog_categories': ['Content Marketing', 'Technology'],
|
||||
'social_hashtags': ['#content', '#marketing', '#technology'],
|
||||
'meta_description': f'Discover insights about {primary_kw_first}. Comprehensive guide with practical tips and expert analysis.',
|
||||
'url_slug': slug,
|
||||
'blog_tags': primary_kw[:5] if isinstance(primary_kw, list) else [primary_kw_first],
|
||||
'blog_categories': [primary_kw_first.title(), 'Guide'],
|
||||
'social_hashtags': [f'#{primary_kw_first.replace(" ", "")}', '#guide', '#tips'],
|
||||
'reading_time': max(1, word_count // 200),
|
||||
'focus_keyword': primary_keywords.split(', ')[0] if primary_keywords else 'content'
|
||||
'focus_keyword': primary_kw_first
|
||||
}
|
||||
|
||||
logger.info(f"Core metadata generation completed. Response keys: {list(ai_response.keys())}")
|
||||
@@ -302,36 +334,41 @@ class BlogSEOMetadataGenerator:
|
||||
# Check if we got a valid response
|
||||
if not ai_response or not isinstance(ai_response, dict) or not ai_response.get('open_graph') or not ai_response.get('twitter_card') or not ai_response.get('json_ld_schema'):
|
||||
logger.error("Social metadata generation failed: Invalid or empty response from LLM")
|
||||
# Return fallback response
|
||||
# Return fallback response using content-derived values
|
||||
primary_kw = keywords_data.get('primary_keywords', ['content'])
|
||||
primary_kw_first = primary_kw[0] if primary_kw else 'content'
|
||||
slug = re.sub(r'[^a-z0-9]+', '-', blog_title.lower())[:50].strip('-')
|
||||
word_count = len(blog_content.split())
|
||||
current_date = datetime.now().isoformat()
|
||||
return {
|
||||
'open_graph': {
|
||||
'title': blog_title,
|
||||
'description': f'Learn about {keywords_data.get("primary_keywords", ["this topic"])[0] if keywords_data.get("primary_keywords") else "this topic"}.',
|
||||
'image': 'https://example.com/image.jpg',
|
||||
'description': f'Discover insights about {primary_kw_first}. Comprehensive guide with practical tips.',
|
||||
'image': '',
|
||||
'type': 'article',
|
||||
'site_name': 'Your Website',
|
||||
'url': 'https://example.com/blog'
|
||||
'site_name': '',
|
||||
'url': f'https://example.com/blog/{slug}'
|
||||
},
|
||||
'twitter_card': {
|
||||
'card': 'summary_large_image',
|
||||
'title': blog_title,
|
||||
'description': f'Learn about {keywords_data.get("primary_keywords", ["this topic"])[0] if keywords_data.get("primary_keywords") else "this topic"}.',
|
||||
'image': 'https://example.com/image.jpg',
|
||||
'site': '@yourwebsite',
|
||||
'creator': '@author'
|
||||
'description': f'Explore our guide on {primary_kw_first}.',
|
||||
'image': '',
|
||||
'site': '',
|
||||
'creator': ''
|
||||
},
|
||||
'json_ld_schema': {
|
||||
'@context': 'https://schema.org',
|
||||
'@type': 'Article',
|
||||
'headline': blog_title,
|
||||
'description': f'Learn about {keywords_data.get("primary_keywords", ["this topic"])[0] if keywords_data.get("primary_keywords") else "this topic"}.',
|
||||
'author': {'@type': 'Person', 'name': 'Author Name'},
|
||||
'publisher': {'@type': 'Organization', 'name': 'Your Website'},
|
||||
'datePublished': '2025-01-01T00:00:00Z',
|
||||
'dateModified': '2025-01-01T00:00:00Z',
|
||||
'mainEntityOfPage': 'https://example.com/blog',
|
||||
'keywords': keywords_data.get('primary_keywords', ['content']),
|
||||
'wordCount': len(blog_content.split())
|
||||
'description': f'Comprehensive guide about {primary_kw_first}.',
|
||||
'author': {'@type': 'Person', 'name': ''},
|
||||
'publisher': {'@type': 'Organization', 'name': ''},
|
||||
'datePublished': current_date,
|
||||
'dateModified': current_date,
|
||||
'mainEntityOfPage': f'https://example.com/blog/{slug}',
|
||||
'keywords': primary_kw[:5] if isinstance(primary_kw, list) else [primary_kw_first],
|
||||
'wordCount': word_count
|
||||
}
|
||||
}
|
||||
|
||||
@@ -408,21 +445,53 @@ OUTLINE STRUCTURE:
|
||||
- Content hierarchy: Well-structured with {len(outline)} main sections
|
||||
"""
|
||||
|
||||
# Extract SEO analysis insights
|
||||
# Extract SEO analysis insights with weakness-aware guidance
|
||||
seo_context = ""
|
||||
if seo_analysis:
|
||||
overall_score = seo_analysis.get('overall_score', seo_analysis.get('seo_score', 0))
|
||||
category_scores = seo_analysis.get('category_scores', {})
|
||||
applied_recs = seo_analysis.get('applied_recommendations', [])
|
||||
applied_recs = seo_analysis.get('applied_recommendations') or []
|
||||
|
||||
# Build weakness-specific guidance for metadata
|
||||
weakness_guidance = []
|
||||
kw_score = category_scores.get('keywords', category_scores.get('Keywords', 0))
|
||||
if kw_score < 70:
|
||||
weakness_guidance.append("Keyword optimization is weak — ensure title and description prominently feature primary keywords")
|
||||
read_score = category_scores.get('readability', category_scores.get('Readability', 0))
|
||||
if read_score < 70:
|
||||
weakness_guidance.append("Readability needs improvement — use clear, accessible language in the meta description")
|
||||
struct_score = category_scores.get('structure', category_scores.get('Structure', 0))
|
||||
if struct_score < 70:
|
||||
weakness_guidance.append("Content structure needs improvement — the title should clearly signal the content structure")
|
||||
|
||||
seo_context = f"""
|
||||
SEO ANALYSIS RESULTS:
|
||||
- Overall SEO Score: {overall_score}/100
|
||||
- Category Scores: Structure {category_scores.get('structure', category_scores.get('Structure', 0))}, Keywords {category_scores.get('keywords', category_scores.get('Keywords', 0))}, Readability {category_scores.get('readability', category_scores.get('Readability', 0))}
|
||||
- Category Scores: Structure {struct_score}, Keywords {kw_score}, Readability {read_score}
|
||||
- Applied Recommendations: {len(applied_recs)} SEO optimizations have been applied
|
||||
- Content Quality: Optimized for search engines with keyword focus
|
||||
{f"- WEAKNESS GUIDANCE: {'; '.join(weakness_guidance)}" if weakness_guidance else ""}
|
||||
"""
|
||||
|
||||
# Build research context block
|
||||
research_block = ""
|
||||
content_gaps = keywords_data.get('content_gaps', [])
|
||||
competitive_advantages = keywords_data.get('competitive_advantages', [])
|
||||
search_queries = keywords_data.get('search_queries', [])
|
||||
suggested_angles = keywords_data.get('suggested_angles', [])
|
||||
industry_leaders = keywords_data.get('industry_leaders', [])
|
||||
|
||||
if content_gaps:
|
||||
research_block += f"\nCONTENT GAPS (from competitor analysis): {', '.join(content_gaps[:5])}"
|
||||
if competitive_advantages:
|
||||
research_block += f"\nOUR KEY DIFFERENTIATORS: {', '.join(competitive_advantages[:3])}"
|
||||
if search_queries:
|
||||
research_block += f"\nORIGINAL SEARCH QUERIES: {', '.join(search_queries[:5])}"
|
||||
if suggested_angles:
|
||||
research_block += f"\nCONTENT ANGLES: {', '.join(suggested_angles[:3])}"
|
||||
if industry_leaders:
|
||||
research_block += f"\nINDUSTRY LEADERS: {', '.join(industry_leaders[:3])}"
|
||||
|
||||
# Get more content context (key sections instead of just first 1000 chars)
|
||||
content_preview = self._extract_content_highlights(blog_content)
|
||||
|
||||
@@ -443,6 +512,7 @@ SEMANTIC KEYWORDS: {semantic_keywords}
|
||||
SEARCH INTENT: {search_intent}
|
||||
TARGET AUDIENCE: {target_audience}
|
||||
INDUSTRY: {industry}
|
||||
{research_block}
|
||||
|
||||
{seo_context}
|
||||
|
||||
@@ -525,6 +595,18 @@ Generate metadata that is personalized, compelling, and SEO-optimized.
|
||||
overall_score = seo_analysis.get('overall_score', seo_analysis.get('seo_score', 0))
|
||||
seo_context = f"\nSEO SCORE: {overall_score}/100 (optimized content)\n"
|
||||
|
||||
# Build research context for social metadata
|
||||
research_block = ""
|
||||
content_gaps = keywords_data.get('content_gaps', [])
|
||||
competitive_advantages = keywords_data.get('competitive_advantages', [])
|
||||
search_queries = keywords_data.get('search_queries', [])
|
||||
if content_gaps:
|
||||
research_block += f"\nCONTENT GAPS: {', '.join(content_gaps[:3])}"
|
||||
if competitive_advantages:
|
||||
research_block += f"\nDIFFERENTIATORS: {', '.join(competitive_advantages[:3])}"
|
||||
if search_queries:
|
||||
research_block += f"\nSEARCH QUERIES: {', '.join(search_queries[:4])}"
|
||||
|
||||
content_preview = self._extract_content_highlights(blog_content, 1500)
|
||||
|
||||
prompt = f"""
|
||||
@@ -539,6 +621,7 @@ KEYWORDS: {primary_keywords}
|
||||
TARGET AUDIENCE: {target_audience}
|
||||
INDUSTRY: {industry}
|
||||
CURRENT DATE: {current_date}
|
||||
{research_block}
|
||||
|
||||
=== GENERATION REQUIREMENTS ===
|
||||
|
||||
@@ -551,20 +634,20 @@ CURRENT DATE: {current_date}
|
||||
- url: Generate canonical URL structure
|
||||
|
||||
2. TWITTER CARD:
|
||||
- card: "summary_large_image"
|
||||
- title: 70 chars max, optimized for Twitter audience
|
||||
- description: 200 chars max with relevant hashtags inline
|
||||
- image: Match Open Graph image
|
||||
- site: @yourwebsite (placeholder, user should update)
|
||||
- creator: @author (placeholder, user should update)
|
||||
- card: "summary_large_image"
|
||||
- title: 70 chars max, optimized for Twitter audience
|
||||
- description: 200 chars max with relevant hashtags inline
|
||||
- image: Match Open Graph image
|
||||
- site: Leave empty string (user will add their Twitter handle)
|
||||
- creator: Leave empty string (user will add author Twitter handle)
|
||||
|
||||
3. JSON-LD SCHEMA (Article):
|
||||
- @context: "https://schema.org"
|
||||
- @type: "Article"
|
||||
- headline: Article title (optimized)
|
||||
- description: Article description (150-200 chars)
|
||||
- author: {{"@type": "Person", "name": "Author Name"}} (placeholder)
|
||||
- publisher: {{"@type": "Organization", "name": "Site Name", "logo": {{"@type": "ImageObject", "url": "logo-url"}}}}
|
||||
- @context: "https://schema.org"
|
||||
- @type: "Article"
|
||||
- headline: Article title (optimized)
|
||||
- description: Article description (150-200 chars)
|
||||
- author: {{"@type": "Person", "name": ""}} (leave empty, user will add author name)
|
||||
- publisher: {{"@type": "Organization", "name": ""}} (leave empty, user will add site name)
|
||||
- datePublished: {current_date}
|
||||
- dateModified: {current_date}
|
||||
- mainEntityOfPage: {{"@type": "WebPage", "@id": "canonical-url"}}
|
||||
@@ -633,35 +716,109 @@ Make it engaging, personalized for {target_audience}, and optimized for {industr
|
||||
raise e
|
||||
|
||||
def _calculate_optimization_score(self, core_metadata: Dict[str, Any], social_metadata: Dict[str, Any]) -> int:
|
||||
"""Calculate overall optimization score for the generated metadata"""
|
||||
"""Calculate metadata quality score based on content relevance and adherence to best practices.
|
||||
|
||||
Unlike the old completeness-based score (which just checked field existence),
|
||||
this assigns quality-weighted points based on how well each field is optimized.
|
||||
"""
|
||||
try:
|
||||
score = 0
|
||||
|
||||
# Check core metadata completeness
|
||||
if core_metadata.get('seo_title'):
|
||||
score += 15
|
||||
if core_metadata.get('meta_description'):
|
||||
score += 15
|
||||
if core_metadata.get('url_slug'):
|
||||
score += 10
|
||||
if core_metadata.get('blog_tags'):
|
||||
score += 10
|
||||
if core_metadata.get('blog_categories'):
|
||||
score += 10
|
||||
if core_metadata.get('social_hashtags'):
|
||||
score += 10
|
||||
if core_metadata.get('focus_keyword'):
|
||||
score += 10
|
||||
# Title quality (0-15): Length in 50-60 chars is optimal
|
||||
seo_title = core_metadata.get('seo_title', '')
|
||||
if seo_title:
|
||||
title_len = len(seo_title)
|
||||
if 50 <= title_len <= 60:
|
||||
score += 15
|
||||
elif 40 <= title_len <= 70:
|
||||
score += 10
|
||||
elif title_len > 0:
|
||||
score += 5
|
||||
|
||||
# Check social metadata completeness
|
||||
if social_metadata.get('open_graph'):
|
||||
# Meta description quality (0-15): Length in 150-160 chars is optimal, has CTA
|
||||
meta_desc = core_metadata.get('meta_description', '')
|
||||
if meta_desc:
|
||||
desc_len = len(meta_desc)
|
||||
desc_lower = meta_desc.lower()
|
||||
has_cta = any(phrase in desc_lower for phrase in ['learn', 'discover', 'find', 'get', 'explore', 'how to', 'why', 'tips', 'guide', 'try', 'start'])
|
||||
if 150 <= desc_len <= 160 and has_cta:
|
||||
score += 15
|
||||
elif 120 <= desc_len <= 170:
|
||||
score += 10 if has_cta else 7
|
||||
elif desc_len > 0:
|
||||
score += 4
|
||||
|
||||
# URL slug quality (0-10): Short, keyword-rich, no stop words
|
||||
url_slug = core_metadata.get('url_slug', '')
|
||||
if url_slug:
|
||||
slug_parts = url_slug.strip('/').split('/')
|
||||
slug_words = slug_parts[-1].split('-') if slug_parts else []
|
||||
if 2 <= len(slug_words) <= 5:
|
||||
score += 10
|
||||
elif len(slug_words) > 0:
|
||||
score += 5
|
||||
|
||||
# Tags and categories quality (0-20)
|
||||
blog_tags = core_metadata.get('blog_tags', [])
|
||||
blog_categories = core_metadata.get('blog_categories', [])
|
||||
if blog_tags and len(blog_tags) >= 3:
|
||||
score += 10
|
||||
if social_metadata.get('twitter_card'):
|
||||
elif blog_tags:
|
||||
score += 5
|
||||
if social_metadata.get('json_ld_schema'):
|
||||
if blog_categories and len(blog_categories) >= 1:
|
||||
score += 10
|
||||
elif blog_categories:
|
||||
score += 5
|
||||
|
||||
return min(score, 100) # Cap at 100
|
||||
# Social hashtags (0-10): Relevant and non-spammy
|
||||
social_hashtags = core_metadata.get('social_hashtags', [])
|
||||
if social_hashtags and 3 <= len(social_hashtags) <= 8:
|
||||
score += 10
|
||||
elif social_hashtags:
|
||||
score += 5
|
||||
|
||||
# Focus keyword (0-10): Present and relevant
|
||||
focus_keyword = core_metadata.get('focus_keyword', '')
|
||||
if focus_keyword and seo_title and focus_keyword.lower() in seo_title.lower():
|
||||
score += 10
|
||||
elif focus_keyword:
|
||||
score += 4
|
||||
|
||||
# Open Graph quality (0-10): Has title, description, correct type
|
||||
og = social_metadata.get('open_graph', {})
|
||||
if og:
|
||||
og_score = 0
|
||||
if og.get('title') and len(og.get('title', '')) > 10:
|
||||
og_score += 4
|
||||
if og.get('description') and 100 <= len(og.get('description', '')) <= 200:
|
||||
og_score += 4
|
||||
if og.get('type') == 'article':
|
||||
og_score += 2
|
||||
score += og_score
|
||||
|
||||
# Twitter Card quality (0-5)
|
||||
twitter = social_metadata.get('twitter_card', {})
|
||||
if twitter:
|
||||
tw_score = 0
|
||||
if twitter.get('title') and len(twitter.get('title', '')) > 10:
|
||||
tw_score += 3
|
||||
if twitter.get('card') == 'summary_large_image':
|
||||
tw_score += 2
|
||||
score += tw_score
|
||||
|
||||
# JSON-LD quality (0-5): Has headline, description, datePublished
|
||||
json_ld = social_metadata.get('json_ld_schema', {})
|
||||
if json_ld:
|
||||
jl_score = 0
|
||||
if json_ld.get('headline'):
|
||||
jl_score += 2
|
||||
if json_ld.get('description'):
|
||||
jl_score += 2
|
||||
if json_ld.get('datePublished'):
|
||||
jl_score += 1
|
||||
score += jl_score
|
||||
|
||||
return min(score, 100)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to calculate optimization score: {e}")
|
||||
|
||||
@@ -2,6 +2,13 @@
|
||||
|
||||
Applies actionable SEO recommendations to existing blog content using the
|
||||
provider-agnostic `llm_text_gen` dispatcher. Ensures GPT_PROVIDER parity.
|
||||
|
||||
Key design principles:
|
||||
- Make TARGETED edits, not full rewrites
|
||||
- Preserve existing content structure and factual claims
|
||||
- Only modify sections that have applicable recommendations
|
||||
- Never fabricate statistics, case studies, or citations
|
||||
- Ground changes in research sources when available
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
@@ -15,7 +22,7 @@ logger = get_service_logger("blog_seo_recommendation_applier")
|
||||
|
||||
|
||||
class BlogSEORecommendationApplier:
|
||||
"""Apply actionable SEO recommendations to blog content."""
|
||||
"""Apply actionable SEO recommendations to blog content with targeted edits."""
|
||||
|
||||
def __init__(self):
|
||||
logger.debug("Initialized BlogSEORecommendationApplier")
|
||||
@@ -35,6 +42,7 @@ class BlogSEORecommendationApplier:
|
||||
persona = payload.get("persona", {})
|
||||
tone = payload.get("tone")
|
||||
audience = payload.get("audience")
|
||||
competitive_advantage = payload.get("competitive_advantage", "")
|
||||
|
||||
if not sections:
|
||||
return {"success": False, "error": "No sections provided for recommendation application"}
|
||||
@@ -43,16 +51,21 @@ class BlogSEORecommendationApplier:
|
||||
logger.warning("apply_recommendations called without recommendations")
|
||||
return {"success": True, "title": title, "sections": sections, "applied": []}
|
||||
|
||||
# Determine which sections actually need changes based on recommendations
|
||||
sections_to_edit = self._identify_affected_sections(sections, recommendations)
|
||||
|
||||
prompt = self._build_prompt(
|
||||
title=title,
|
||||
introduction=introduction,
|
||||
sections=sections,
|
||||
sections_to_edit=sections_to_edit,
|
||||
outline=outline,
|
||||
research=research,
|
||||
recommendations=recommendations,
|
||||
persona=persona,
|
||||
tone=tone,
|
||||
audience=audience,
|
||||
competitive_advantage=competitive_advantage,
|
||||
)
|
||||
|
||||
schema = {
|
||||
@@ -87,14 +100,14 @@ class BlogSEORecommendationApplier:
|
||||
"required": ["sections"],
|
||||
}
|
||||
|
||||
logger.info("Applying SEO recommendations via llm_text_gen")
|
||||
logger.info("Applying SEO recommendations via llm_text_gen (targeted edit mode)")
|
||||
|
||||
result = await asyncio.to_thread(
|
||||
llm_text_gen,
|
||||
prompt,
|
||||
None,
|
||||
schema,
|
||||
user_id, # Pass user_id for subscription checking
|
||||
user_id,
|
||||
max_tokens=8192,
|
||||
)
|
||||
|
||||
@@ -106,14 +119,12 @@ class BlogSEORecommendationApplier:
|
||||
raw_sections = result.get("sections", []) or []
|
||||
normalized_sections: List[Dict[str, Any]] = []
|
||||
|
||||
# Warn if LLM returned different number of sections (may miss intro/conclusion added as new sections)
|
||||
if len(raw_sections) != len(sections):
|
||||
logger.warning(
|
||||
f"LLM returned {len(raw_sections)} sections but {len(sections)} were sent. "
|
||||
"Extra sections will be ignored; missing sections fall back to original content."
|
||||
)
|
||||
|
||||
# Build lookup table from updated sections using their identifiers
|
||||
updated_map: Dict[str, Dict[str, Any]] = {}
|
||||
for updated in raw_sections:
|
||||
section_id = str(
|
||||
@@ -156,7 +167,6 @@ class BlogSEORecommendationApplier:
|
||||
mapped = updated_map.get(fallback_id)
|
||||
|
||||
if not mapped and raw_sections:
|
||||
# Fall back to positional match if identifier lookup failed
|
||||
candidate = raw_sections[index] if index < len(raw_sections) else {}
|
||||
heading = (
|
||||
candidate.get("heading")
|
||||
@@ -176,7 +186,6 @@ class BlogSEORecommendationApplier:
|
||||
}
|
||||
|
||||
if not mapped:
|
||||
# Fallback to original content if nothing else available
|
||||
mapped = {
|
||||
"id": fallback_id,
|
||||
"heading": original.get("heading") or original.get("title") or f"Section {index + 1}",
|
||||
@@ -190,12 +199,11 @@ class BlogSEORecommendationApplier:
|
||||
|
||||
logger.info("SEO recommendations applied successfully")
|
||||
|
||||
# Extract updated introduction from LLM response if available
|
||||
updated_introduction = result.get("introduction") or ""
|
||||
if updated_introduction and updated_introduction != introduction:
|
||||
logger.info(f"Introduction updated: {len(updated_introduction)} chars")
|
||||
elif not updated_introduction:
|
||||
updated_introduction = introduction # fall back to original
|
||||
updated_introduction = introduction
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
@@ -205,37 +213,133 @@ class BlogSEORecommendationApplier:
|
||||
"applied": applied,
|
||||
}
|
||||
|
||||
def _identify_affected_sections(self, sections: List[Dict[str, Any]], recommendations: List[Dict[str, Any]]) -> List[str]:
|
||||
"""Identify which section IDs are likely affected by the recommendations.
|
||||
|
||||
Maps recommendation categories to section headings for targeted editing.
|
||||
Returns a list of section IDs that should be edited.
|
||||
"""
|
||||
affected_ids = set()
|
||||
|
||||
for rec in recommendations:
|
||||
category = (rec.get("category") or "").lower()
|
||||
rec_text = (rec.get("recommendation") or "").lower()
|
||||
|
||||
# Structure recommendations affect first/last sections or all sections
|
||||
if category == "structure":
|
||||
if sections:
|
||||
affected_ids.add(str(sections[0].get("id", "section_1")))
|
||||
affected_ids.add(str(sections[-1].get("id", f"section_{len(sections)}")))
|
||||
# "Add more sections" or "too many sections" affects all
|
||||
if "more section" in rec_text or "combine" in rec_text or "flow" in rec_text:
|
||||
for s in sections:
|
||||
affected_ids.add(str(s.get("id", "")))
|
||||
continue
|
||||
|
||||
# Keyword recommendations affect all sections (keywords should be spread)
|
||||
if category == "keywords":
|
||||
for s in sections:
|
||||
affected_ids.add(str(s.get("id", "")))
|
||||
continue
|
||||
|
||||
# Readability affects all sections
|
||||
if category == "readability":
|
||||
for s in sections:
|
||||
affected_ids.add(str(s.get("id", "")))
|
||||
continue
|
||||
|
||||
# Content quality — try to match recommendation to specific section headings
|
||||
if category in ("content quality", "content", "seo"):
|
||||
heading_keywords = {
|
||||
s.get("heading", "").lower(): str(s.get("id", ""))
|
||||
for s in sections
|
||||
}
|
||||
matched = False
|
||||
for heading_lower, section_id in heading_keywords.items():
|
||||
rec_words = rec_text.split()
|
||||
if any(word in heading_lower for word in rec_words if len(word) > 3):
|
||||
affected_ids.add(section_id)
|
||||
matched = True
|
||||
if not matched:
|
||||
# Affect first and last sections (intro/conclusion) as common targets
|
||||
if sections:
|
||||
affected_ids.add(str(sections[0].get("id", "section_1")))
|
||||
affected_ids.add(str(sections[-1].get("id", f"section_{len(sections)}")))
|
||||
|
||||
# Filter out empty IDs and return
|
||||
return [sid for sid in affected_ids if sid]
|
||||
|
||||
def _build_prompt(
|
||||
self,
|
||||
*,
|
||||
title: str,
|
||||
introduction: str,
|
||||
sections: List[Dict[str, Any]],
|
||||
sections_to_edit: List[str],
|
||||
outline: List[Dict[str, Any]],
|
||||
research: Dict[str, Any],
|
||||
recommendations: List[Dict[str, Any]],
|
||||
persona: Dict[str, Any],
|
||||
tone: str | None,
|
||||
audience: str | None,
|
||||
competitive_advantage: str = "",
|
||||
) -> str:
|
||||
"""Construct prompt for applying recommendations."""
|
||||
"""Construct prompt for applying targeted recommendations."""
|
||||
|
||||
sections_str = []
|
||||
# Build research context block
|
||||
research_block = ""
|
||||
keyword_analysis = research.get("keyword_analysis", {}) if research else {}
|
||||
primary_keywords = ", ".join(keyword_analysis.get("primary", [])[:8]) or "None"
|
||||
competitor_analysis = research.get("competitor_analysis", {}) if research else {}
|
||||
search_queries = research.get("search_queries", []) if research else []
|
||||
suggested_angles = research.get("suggested_angles", []) if research else []
|
||||
content_gaps = competitor_analysis.get("content_gaps", []) if competitor_analysis else []
|
||||
competitive_advantages = competitor_analysis.get("competitive_advantages", []) if competitor_analysis else []
|
||||
|
||||
research_block += f"\nPRIMARY KEYWORDS: {primary_keywords}"
|
||||
if content_gaps:
|
||||
research_block += f"\nCONTENT GAPS (address these in your edits): {', '.join(content_gaps[:5])}"
|
||||
if competitive_advantages:
|
||||
research_block += f"\nKEY DIFFERENTIATORS (emphasize these): {', '.join(competitive_advantages[:3])}"
|
||||
if competitive_advantage:
|
||||
research_block += f"\nPRIMARY ADVANTAGE: {competitive_advantage}"
|
||||
if search_queries:
|
||||
research_block += f"\nTARGET SEARCH QUERIES: {', '.join(search_queries[:5])}"
|
||||
if suggested_angles:
|
||||
research_block += f"\nCONTENT ANGLES: {', '.join(suggested_angles[:3])}"
|
||||
|
||||
# Build per-section content with edit markers
|
||||
sections_content = []
|
||||
for section in sections:
|
||||
sections_str.append(
|
||||
f"ID: {section.get('id', 'section')}, Heading: {section.get('heading', 'Untitled')}\n"
|
||||
f"Current Content:\n{section.get('content', '')}\n"
|
||||
)
|
||||
section_id = str(section.get("id", "section"))
|
||||
heading = section.get("heading", "Untitled")
|
||||
content = section.get("content", "")
|
||||
needs_edit = section_id in sections_to_edit
|
||||
|
||||
section_text = f"--- SECTION (ID: {section_id}, Heading: \"{heading}\")"
|
||||
if needs_edit:
|
||||
section_text += " [NEEDS EDITS based on recommendations]"
|
||||
else:
|
||||
section_text += " [KEEP AS-IS - no changes needed]"
|
||||
section_text += f" ---\n{content}\n"
|
||||
sections_content.append(section_text)
|
||||
|
||||
sections_str = "\n\n".join(sections_content)
|
||||
|
||||
outline_str = "\n".join(
|
||||
[
|
||||
f"- {item.get('heading', 'Section')} (Target words: {item.get('target_words', 'N/A')})"
|
||||
for item in outline
|
||||
]
|
||||
)
|
||||
|
||||
research_summary = research.get("keyword_analysis", {}) if research else {}
|
||||
primary_keywords = ", ".join(research_summary.get("primary", [])[:10]) or "None"
|
||||
# Build outline with subheadings and key points
|
||||
outline_parts = []
|
||||
for item in outline:
|
||||
heading = item.get("heading", "Section")
|
||||
target_words = item.get("target_words", "N/A")
|
||||
subheadings = item.get("subheadings", [])
|
||||
key_points = item.get("key_points", [])
|
||||
line = f"- {heading} (Target: {target_words} words)"
|
||||
if subheadings:
|
||||
line += f" | Subheadings: {', '.join(subheadings[:4])}"
|
||||
if key_points:
|
||||
line += f" | Key points: {', '.join(key_points[:4])}"
|
||||
outline_parts.append(line)
|
||||
outline_str = "\n".join(outline_parts) if outline_parts else "No outline supplied"
|
||||
|
||||
recommendations_str = []
|
||||
for rec in recommendations:
|
||||
@@ -248,7 +352,7 @@ class BlogSEORecommendationApplier:
|
||||
persona_str = (
|
||||
f"Persona: {persona}\n"
|
||||
if persona
|
||||
else "Persona: (not provided)\n"
|
||||
else ""
|
||||
)
|
||||
|
||||
style_guidance = []
|
||||
@@ -258,44 +362,47 @@ class BlogSEORecommendationApplier:
|
||||
style_guidance.append(f"Target audience: {audience}")
|
||||
style_str = "\n".join(style_guidance) if style_guidance else "Maintain current tone and audience alignment."
|
||||
|
||||
prompt = f"""
|
||||
You are an expert SEO content strategist. Update the blog content to apply the actionable recommendations.
|
||||
intro_text = introduction if introduction else "(No introduction currently — write one ONLY if a recommendation specifically asks for it)"
|
||||
|
||||
Current Title: {title}
|
||||
prompt = f"""You are a careful SEO content editor making TARGETED edits to an existing blog post. Your job is to apply specific SEO recommendations with PRECISION — not to rewrite the entire post.
|
||||
|
||||
Current Introduction:
|
||||
{introduction if introduction else '(No introduction exists — write a compelling one if the recommendations require it)'}
|
||||
CRITICAL RULES — YOU MUST FOLLOW THESE:
|
||||
1. PRESERVE existing content. Only make MINIMAL, targeted changes to address specific recommendations. Do NOT rewrite sections that are working well.
|
||||
2. NEVER fabricate statistics, case studies, expert quotes, research data, or specific numbers unless they are explicitly stated in the research context below.
|
||||
3. NEVER add content that contradicts or goes beyond what the research sources support.
|
||||
4. KEEP the same emotional tone and writing style as the original content.
|
||||
5. Return EXACTLY the same number of sections with EXACTLY the same IDs. Do NOT add, remove, or rename sections.
|
||||
6. For sections marked [KEEP AS-IS], return the content UNCHANGED — copy it verbatim.
|
||||
7. For sections marked [NEEDS EDITS], make ONLY the specific changes needed to address the applicable recommendations.
|
||||
8. Do NOT add introductions, conclusions, or case studies unless a recommendation EXPLICITLY asks for one.
|
||||
|
||||
Primary Keywords (for context): {primary_keywords}
|
||||
{research_block}
|
||||
|
||||
Outline Overview:
|
||||
{outline_str or 'No outline supplied'}
|
||||
PLANNED OUTLINE STRUCTURE:
|
||||
{outline_str}
|
||||
|
||||
Existing Sections:
|
||||
{''.join(sections_str)}
|
||||
CURRENT TITLE: {title}
|
||||
|
||||
Actionable Recommendations to Apply:
|
||||
CURRENT INTRODUCTION:
|
||||
{intro_text}
|
||||
|
||||
CURRENT SECTIONS:
|
||||
{sections_str}
|
||||
|
||||
RECOMMENDATIONS TO APPLY:
|
||||
{''.join(recommendations_str)}
|
||||
{persona_str}{style_str}
|
||||
|
||||
{persona_str}
|
||||
{style_str}
|
||||
|
||||
Instructions:
|
||||
1. Carefully apply the recommendations while preserving factual accuracy and research alignment.
|
||||
2. You MUST return EXACTLY the same number of sections, with EXACTLY the same IDs as provided above. Do NOT add or remove sections.
|
||||
3. If a recommendation says content is MISSING (e.g. missing introduction or conclusion), incorporate that missing content into the MOST APPROPRIATE existing section:
|
||||
- Missing introduction → PREPEND introductory content to the FIRST section's existing content.
|
||||
- Missing conclusion → APPEND concluding content to the LAST section's existing content.
|
||||
- For other missing content, add it to the section whose heading best matches the recommendation.
|
||||
4. Additionally, if an introduction is missing or weak, write a compelling introduction in the "introduction" field of your response. If the current introduction is adequate, return it unchanged.
|
||||
5. Improve clarity, flow, and SEO optimization per the guidance.
|
||||
6. Return updated sections in the requested JSON format.
|
||||
7. Provide a short summary of which recommendations were addressed.
|
||||
INSTRUCTIONS:
|
||||
- For sections marked [KEEP AS-IS]: Copy the content EXACTLY as provided. Do not change a single word.
|
||||
- For sections marked [NEEDS EDITS]: Make the MINIMUM changes needed to address the recommendations. If a recommendation says "add transition words", add 2-3 transitions — do not rewrite the paragraph. If it says "use more varied vocabulary", replace 2-3 repetitive words — do not rewrite the section.
|
||||
- If a recommendation asks for an introduction and none exists, write a brief 2-3 sentence introduction that naturally leads into the first section. Do NOT fabricate hooks or statistics.
|
||||
- If a recommendation asks for a conclusion, append 2-3 sentences summarizing key takeaways to the LAST section. Do NOT fabricate conclusions that don't follow from the actual content.
|
||||
- Return ALL sections, including the ones you did NOT change.
|
||||
- Provide a summary of which recommendations you addressed and what specific changes you made.
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
__all__ = ["BlogSEORecommendationApplier"]
|
||||
|
||||
|
||||
__all__ = ["BlogSEORecommendationApplier"]
|
||||
Reference in New Issue
Block a user