650 lines
30 KiB
Python
650 lines
30 KiB
Python
from typing import Any, Dict, List
|
|
from loguru import logger
|
|
from services.llm_providers.gemini_provider import gemini_structured_json_response
|
|
|
|
from models.blog_models import (
|
|
BlogResearchRequest,
|
|
BlogResearchResponse,
|
|
BlogOutlineRequest,
|
|
BlogOutlineResponse,
|
|
BlogOutlineRefineRequest,
|
|
BlogSectionRequest,
|
|
BlogSectionResponse,
|
|
BlogOptimizeRequest,
|
|
BlogOptimizeResponse,
|
|
BlogSEOAnalyzeRequest,
|
|
BlogSEOAnalyzeResponse,
|
|
BlogSEOMetadataRequest,
|
|
BlogSEOMetadataResponse,
|
|
BlogPublishRequest,
|
|
BlogPublishResponse,
|
|
ResearchSource,
|
|
BlogOutlineSection,
|
|
)
|
|
|
|
|
|
class BlogWriterService:
|
|
"""Service layer for AI Blog Writer (stub implementations for scaffolding)."""
|
|
|
|
async def research(self, request: BlogResearchRequest) -> BlogResearchResponse:
|
|
"""
|
|
Stage 1: Research & Strategy (AI Orchestration)
|
|
Uses ONLY Gemini's native Google Search grounding - ONE API call for everything.
|
|
Follows LinkedIn service pattern for efficiency and cost optimization.
|
|
"""
|
|
from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
|
|
|
|
gemini = GeminiGroundedProvider()
|
|
|
|
topic = request.topic or ", ".join(request.keywords)
|
|
industry = request.industry or (request.persona.industry if request.persona and request.persona.industry else "General")
|
|
target_audience = getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'
|
|
|
|
# Single comprehensive research prompt - Gemini handles Google Search automatically
|
|
research_prompt = f"""
|
|
Research the topic "{topic}" in the {industry} industry for {target_audience} audience. Provide a comprehensive analysis including:
|
|
|
|
1. Current trends and insights (2024-2025)
|
|
2. Key statistics and data points with sources
|
|
3. Industry expert opinions and quotes
|
|
4. Recent developments and news
|
|
5. Market analysis and forecasts
|
|
6. Best practices and case studies
|
|
7. Keyword analysis: primary, secondary, and long-tail opportunities
|
|
8. Competitor analysis: top players and content gaps
|
|
9. Content angle suggestions: 5 compelling angles for blog posts
|
|
|
|
Focus on factual, up-to-date information from credible sources.
|
|
Include specific data points, percentages, and recent developments.
|
|
Structure your response with clear sections for each analysis area.
|
|
"""
|
|
|
|
# Single Gemini call with native Google Search grounding - no fallbacks
|
|
gemini_result = await gemini.generate_grounded_content(
|
|
prompt=research_prompt,
|
|
content_type="research",
|
|
max_tokens=2000
|
|
)
|
|
|
|
# Extract sources from grounding metadata
|
|
sources = self._extract_sources_from_grounding(gemini_result)
|
|
|
|
# Extract search widget and queries for UI display
|
|
search_widget = gemini_result.get("search_widget", "") or ""
|
|
search_queries = gemini_result.get("search_queries", []) or []
|
|
|
|
# Parse the comprehensive response for different analysis components
|
|
content = gemini_result.get("content", "")
|
|
keyword_analysis = self._parse_keyword_analysis(content, request.keywords)
|
|
competitor_analysis = self._parse_competitor_analysis(content)
|
|
suggested_angles = self._parse_content_angles(content, topic, industry)
|
|
|
|
logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
|
|
|
|
return BlogResearchResponse(
|
|
success=True,
|
|
sources=sources,
|
|
keyword_analysis=keyword_analysis,
|
|
competitor_analysis=competitor_analysis,
|
|
suggested_angles=suggested_angles,
|
|
# Add search widget and queries for UI display
|
|
search_widget=search_widget if 'search_widget' in locals() else "",
|
|
search_queries=search_queries if 'search_queries' in locals() else [],
|
|
)
|
|
|
|
def _extract_sources_from_grounding(self, gemini_result: Dict[str, Any]) -> List[ResearchSource]:
|
|
"""Extract sources from Gemini grounding metadata."""
|
|
sources = []
|
|
|
|
# The Gemini grounded provider already extracts sources and puts them in the 'sources' field
|
|
raw_sources = gemini_result.get("sources", [])
|
|
for src in raw_sources:
|
|
source = ResearchSource(
|
|
title=src.get("title", "Untitled"),
|
|
url=src.get("url", ""),
|
|
excerpt=src.get("content", "")[:500] if src.get("content") else f"Source from {src.get('title', 'web')}",
|
|
credibility_score=float(src.get("credibility_score", 0.8)),
|
|
published_at=str(src.get("publication_date", "2024-01-01"))
|
|
)
|
|
sources.append(source)
|
|
|
|
return sources
|
|
|
|
def _parse_keyword_analysis(self, content: str, original_keywords: List[str]) -> Dict[str, Any]:
|
|
"""Parse keyword analysis from the research content."""
|
|
# Extract keywords from content sections
|
|
lines = content.split('\n')
|
|
keyword_section = []
|
|
in_keyword_section = False
|
|
|
|
for line in lines:
|
|
if 'keyword' in line.lower() and ('analysis' in line.lower() or 'primary' in line.lower()):
|
|
in_keyword_section = True
|
|
continue
|
|
if in_keyword_section and line.strip():
|
|
if line.startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.')):
|
|
break
|
|
keyword_section.append(line.strip())
|
|
|
|
return {
|
|
"primary": original_keywords[:1] if original_keywords else [],
|
|
"secondary": original_keywords[1:] if len(original_keywords) > 1 else [],
|
|
"long_tail": [f"{kw} guide" for kw in original_keywords[:2]] if original_keywords else [],
|
|
"search_intent": "informational",
|
|
"difficulty": 6,
|
|
"content_gaps": [f"{kw} best practices" for kw in original_keywords[:2]] if original_keywords else [],
|
|
"analysis_content": "\n".join(keyword_section) if keyword_section else content[:200]
|
|
}
|
|
|
|
def _parse_competitor_analysis(self, content: str) -> Dict[str, Any]:
|
|
"""Parse competitor analysis from the research content."""
|
|
lines = content.split('\n')
|
|
competitor_section = []
|
|
in_competitor_section = False
|
|
|
|
for line in lines:
|
|
if 'competitor' in line.lower() and ('analysis' in line.lower() or 'top' in line.lower()):
|
|
in_competitor_section = True
|
|
continue
|
|
if in_competitor_section and line.strip():
|
|
if line.startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.')):
|
|
break
|
|
competitor_section.append(line.strip())
|
|
|
|
return {
|
|
"top_competitors": [],
|
|
"content_gaps": [],
|
|
"opportunities": [],
|
|
"analysis_notes": "\n".join(competitor_section) if competitor_section else "Competitor analysis from research"
|
|
}
|
|
|
|
def _parse_content_angles(self, content: str, topic: str, industry: str) -> List[str]:
|
|
"""Parse content angles from the research content."""
|
|
lines = content.split('\n')
|
|
angles_section = []
|
|
in_angles_section = False
|
|
|
|
for line in lines:
|
|
if 'angle' in line.lower() and ('suggest' in line.lower() or 'content' in line.lower()):
|
|
in_angles_section = True
|
|
continue
|
|
if in_angles_section and line.strip():
|
|
if line.startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.')):
|
|
break
|
|
if line.strip() and not line.startswith(('•', '-', '*')):
|
|
angles_section.append(line.strip())
|
|
|
|
# If no angles found in content, use fallback
|
|
if not angles_section:
|
|
angles_section = [
|
|
f"How {topic} is Transforming {industry}",
|
|
f"Latest {topic} Trends: What You Need to Know",
|
|
f"{topic} Best Practices for {industry}",
|
|
f"Case Study: {topic} Success Stories",
|
|
f"The Future of {topic} in {industry}"
|
|
]
|
|
|
|
return angles_section[:5] # Return top 5 angles
|
|
|
|
|
|
async def generate_outline(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
|
|
"""
|
|
Stage 2: Content Planning with AI-generated outline using research results
|
|
Uses Gemini with research data to create comprehensive, SEO-optimized outline
|
|
"""
|
|
# Extract research insights
|
|
research = request.research
|
|
primary_keywords = research.keyword_analysis.get('primary', [])
|
|
secondary_keywords = research.keyword_analysis.get('secondary', [])
|
|
content_angles = research.suggested_angles
|
|
sources = research.sources
|
|
search_intent = research.keyword_analysis.get('search_intent', 'informational')
|
|
|
|
# Build sophisticated outline generation prompt with advanced content strategy
|
|
outline_prompt = f"""
|
|
You are a world-class content strategist and SEO expert with 15+ years of experience creating viral, high-converting blog content. Your outlines have generated millions of views and driven significant business results.
|
|
|
|
CONTENT STRATEGY BRIEF:
|
|
Topic: {', '.join(primary_keywords)}
|
|
Search Intent: {search_intent}
|
|
Target Word Count: {request.word_count or 1500} words
|
|
Industry Context: {getattr(request.persona, 'industry', 'General') if request.persona else 'General'}
|
|
Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}
|
|
|
|
RESEARCH INTELLIGENCE:
|
|
Primary Keywords: {', '.join(primary_keywords)}
|
|
Secondary Keywords: {', '.join(secondary_keywords)}
|
|
Long-tail Opportunities: {', '.join(research.keyword_analysis.get('long_tail', [])[:5])}
|
|
|
|
Content Angles Discovered:
|
|
{chr(10).join([f"• {angle}" for angle in content_angles[:6]])}
|
|
|
|
Research Sources Available: {len(sources)} authoritative sources with current data
|
|
|
|
STRATEGIC OUTLINE REQUIREMENTS:
|
|
|
|
1. CONTENT ARCHITECTURE:
|
|
- Create 5-7 sections that follow a logical progression
|
|
- Each section must have a clear purpose and value proposition
|
|
- Build a narrative arc that keeps readers engaged throughout
|
|
- Include strategic content gaps that competitors miss
|
|
|
|
2. SEO OPTIMIZATION:
|
|
- Naturally integrate primary keywords in H2 headings (not forced)
|
|
- Use secondary keywords in subheadings and key points
|
|
- Include long-tail keywords in natural language
|
|
- Optimize for featured snippets and voice search
|
|
- Create semantic keyword clusters
|
|
|
|
3. READER ENGAGEMENT:
|
|
- Start with a compelling hook that addresses pain points
|
|
- Use storytelling elements and real-world examples
|
|
- Include actionable insights readers can implement immediately
|
|
- Create sections that encourage social sharing
|
|
- End with a strong call-to-action
|
|
|
|
4. CONTENT DEPTH:
|
|
- Each section: 2-4 specific, actionable subheadings
|
|
- Each section: 4-6 key points with research-backed insights
|
|
- Include data points, statistics, and case studies where relevant
|
|
- Address common objections and questions
|
|
- Provide unique angles not covered by competitors
|
|
|
|
5. WORD COUNT DISTRIBUTION:
|
|
- Introduction: 10-15% of total words
|
|
- Main sections: 70-80% of total words (distributed strategically)
|
|
- Conclusion: 10-15% of total words
|
|
- Total target: {request.word_count or 1500} words
|
|
|
|
6. COMPETITIVE ADVANTAGE:
|
|
- Include fresh perspectives from recent research
|
|
- Address emerging trends and future implications
|
|
- Provide deeper insights than surface-level content
|
|
- Include practical tools, frameworks, or templates
|
|
- Reference authoritative sources and data
|
|
|
|
TITLE STRATEGY:
|
|
Create 3 distinct title options that:
|
|
- Include primary keywords naturally
|
|
- Promise clear value to readers
|
|
- Create curiosity and urgency
|
|
- Are optimized for click-through rates
|
|
- Work well for social media sharing
|
|
|
|
CRITICAL: Respond ONLY with valid JSON. No additional text or explanations.
|
|
|
|
JSON FORMAT:
|
|
{{
|
|
"title_options": [
|
|
"Compelling title with primary keyword and benefit",
|
|
"Question-based title that creates curiosity",
|
|
"How-to title with specific outcome promise"
|
|
],
|
|
"outline": [
|
|
{{
|
|
"heading": "Strategic section title with primary keyword",
|
|
"subheadings": [
|
|
"Specific, actionable subheading 1",
|
|
"Data-driven subheading 2",
|
|
"Case study or example subheading 3"
|
|
],
|
|
"key_points": [
|
|
"Research-backed insight with specific data",
|
|
"Actionable step readers can take immediately",
|
|
"Common mistake to avoid with explanation",
|
|
"Advanced tip that provides competitive advantage",
|
|
"Real-world example or case study"
|
|
],
|
|
"target_words": 300,
|
|
"keywords": ["primary keyword", "secondary keyword", "long-tail phrase"]
|
|
}}
|
|
]
|
|
}}
|
|
"""
|
|
|
|
logger.info("Generating AI-powered outline using research results")
|
|
|
|
# Define the schema for structured JSON response
|
|
outline_schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"title_options": {
|
|
"type": "array",
|
|
"items": {"type": "string"},
|
|
"description": "3 SEO-optimized title options"
|
|
},
|
|
"outline": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"id": {"type": "string"},
|
|
"heading": {"type": "string"},
|
|
"subheadings": {
|
|
"type": "array",
|
|
"items": {"type": "string"}
|
|
},
|
|
"key_points": {
|
|
"type": "array",
|
|
"items": {"type": "string"}
|
|
},
|
|
"word_count": {"type": "integer"},
|
|
"keywords": {
|
|
"type": "array",
|
|
"items": {"type": "string"},
|
|
"description": "Keywords to focus on in this section"
|
|
}
|
|
},
|
|
"required": ["id", "heading", "subheadings", "key_points", "word_count", "keywords"]
|
|
}
|
|
}
|
|
},
|
|
"required": ["title_options", "outline"]
|
|
}
|
|
|
|
# Generate outline using structured JSON response (no grounding needed)
|
|
outline_data = gemini_structured_json_response(
|
|
prompt=outline_prompt,
|
|
schema=outline_schema,
|
|
temperature=0.3,
|
|
max_tokens=3000
|
|
)
|
|
|
|
# Check for errors in the response
|
|
if isinstance(outline_data, dict) and 'error' in outline_data:
|
|
logger.error(f"Gemini structured response error: {outline_data['error']}")
|
|
raise ValueError(f"AI outline generation failed: {outline_data['error']}")
|
|
|
|
# Validate required fields
|
|
if not isinstance(outline_data, dict) or 'outline' not in outline_data or not isinstance(outline_data['outline'], list):
|
|
logger.error(f"Invalid outline structure: {outline_data}")
|
|
raise ValueError("Invalid outline structure in Gemini response")
|
|
|
|
# Convert to BlogOutlineSection objects
|
|
outline_sections = []
|
|
for i, section_data in enumerate(outline_data.get('outline', [])):
|
|
if not isinstance(section_data, dict) or 'heading' not in section_data:
|
|
logger.warning(f"Skipping invalid section data at index {i}")
|
|
continue
|
|
|
|
section = BlogOutlineSection(
|
|
id=f"s{i+1}",
|
|
heading=section_data.get('heading', f'Section {i+1}'),
|
|
subheadings=section_data.get('subheadings', []),
|
|
key_points=section_data.get('key_points', []),
|
|
references=sources[:2] if i < 2 else [], # Assign sources to first 2 sections
|
|
target_words=section_data.get('target_words', 300),
|
|
keywords=section_data.get('keywords', [])
|
|
)
|
|
outline_sections.append(section)
|
|
|
|
title_options = outline_data.get('title_options', [])
|
|
if not title_options:
|
|
raise ValueError("No title options provided in Gemini response")
|
|
|
|
logger.info(f"Generated outline with {len(outline_sections)} sections and {len(title_options)} title options")
|
|
|
|
return BlogOutlineResponse(
|
|
success=True,
|
|
title_options=title_options,
|
|
outline=outline_sections
|
|
)
|
|
|
|
|
|
async def refine_outline(self, request: BlogOutlineRefineRequest) -> BlogOutlineResponse:
|
|
"""
|
|
Refine outline with HITL (Human-in-the-Loop) operations
|
|
Supports add, remove, move, merge, rename operations
|
|
"""
|
|
outline = request.outline.copy()
|
|
operation = request.operation.lower()
|
|
section_id = request.section_id
|
|
payload = request.payload or {}
|
|
|
|
try:
|
|
if operation == 'add':
|
|
# Add new section
|
|
new_section = BlogOutlineSection(
|
|
id=f"s{len(outline) + 1}",
|
|
heading=payload.get('heading', 'New Section'),
|
|
subheadings=payload.get('subheadings', []),
|
|
key_points=payload.get('key_points', []),
|
|
references=[],
|
|
target_words=payload.get('target_words', 300)
|
|
)
|
|
outline.append(new_section)
|
|
logger.info(f"Added new section: {new_section.heading}")
|
|
|
|
elif operation == 'remove' and section_id:
|
|
# Remove section
|
|
outline = [s for s in outline if s.id != section_id]
|
|
logger.info(f"Removed section: {section_id}")
|
|
|
|
elif operation == 'rename' and section_id:
|
|
# Rename section
|
|
for section in outline:
|
|
if section.id == section_id:
|
|
section.heading = payload.get('heading', section.heading)
|
|
break
|
|
logger.info(f"Renamed section {section_id} to: {payload.get('heading')}")
|
|
|
|
elif operation == 'move' and section_id:
|
|
# Move section (reorder)
|
|
direction = payload.get('direction', 'down') # 'up' or 'down'
|
|
current_index = next((i for i, s in enumerate(outline) if s.id == section_id), -1)
|
|
|
|
if current_index != -1:
|
|
if direction == 'up' and current_index > 0:
|
|
outline[current_index], outline[current_index - 1] = outline[current_index - 1], outline[current_index]
|
|
elif direction == 'down' and current_index < len(outline) - 1:
|
|
outline[current_index], outline[current_index + 1] = outline[current_index + 1], outline[current_index]
|
|
logger.info(f"Moved section {section_id} {direction}")
|
|
|
|
elif operation == 'merge' and section_id:
|
|
# Merge with next section
|
|
current_index = next((i for i, s in enumerate(outline) if s.id == section_id), -1)
|
|
if current_index != -1 and current_index < len(outline) - 1:
|
|
current_section = outline[current_index]
|
|
next_section = outline[current_index + 1]
|
|
|
|
# Merge sections
|
|
current_section.heading = f"{current_section.heading} & {next_section.heading}"
|
|
current_section.subheadings.extend(next_section.subheadings)
|
|
current_section.key_points.extend(next_section.key_points)
|
|
current_section.references.extend(next_section.references)
|
|
current_section.target_words = (current_section.target_words or 0) + (next_section.target_words or 0)
|
|
|
|
# Remove the next section
|
|
outline.pop(current_index + 1)
|
|
logger.info(f"Merged section {section_id} with next section")
|
|
|
|
elif operation == 'update' and section_id:
|
|
# Update section details
|
|
for section in outline:
|
|
if section.id == section_id:
|
|
if 'heading' in payload:
|
|
section.heading = payload['heading']
|
|
if 'subheadings' in payload:
|
|
section.subheadings = payload['subheadings']
|
|
if 'key_points' in payload:
|
|
section.key_points = payload['key_points']
|
|
if 'target_words' in payload:
|
|
section.target_words = payload['target_words']
|
|
break
|
|
logger.info(f"Updated section {section_id}")
|
|
|
|
# Reassign IDs to maintain order
|
|
for i, section in enumerate(outline):
|
|
section.id = f"s{i+1}"
|
|
|
|
return BlogOutlineResponse(
|
|
success=True,
|
|
title_options=["Refined Outline"],
|
|
outline=outline
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Outline refinement failed: {e}")
|
|
return BlogOutlineResponse(
|
|
success=False,
|
|
title_options=["Error"],
|
|
outline=request.outline
|
|
)
|
|
|
|
async def generate_section(self, request: BlogSectionRequest) -> BlogSectionResponse:
|
|
# TODO: Generate section markdown incorporating references and persona/tone
|
|
md = f"## {request.section.heading}\n\nThis section content will be generated here.\n"
|
|
return BlogSectionResponse(success=True, markdown=md, citations=request.section.references)
|
|
|
|
async def optimize_section(self, request: BlogOptimizeRequest) -> BlogOptimizeResponse:
|
|
# TODO: Run readability/EEAT optimization and return diff
|
|
return BlogOptimizeResponse(success=True, optimized=request.content, diff_preview=None)
|
|
|
|
async def hallucination_check(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Run hallucination detection on provided text using existing detector service."""
|
|
text = str(payload.get("text", "") or "").strip()
|
|
if not text:
|
|
return {"success": False, "error": "No text provided"}
|
|
|
|
# Prefer direct service use over HTTP proxy
|
|
try:
|
|
from services.hallucination_detector import HallucinationDetector
|
|
detector = HallucinationDetector()
|
|
result = await detector.detect_hallucinations(text)
|
|
|
|
# Serialize dataclass-like result to dict
|
|
claims = []
|
|
for c in result.claims:
|
|
claims.append({
|
|
"text": c.text,
|
|
"confidence": c.confidence,
|
|
"assessment": c.assessment,
|
|
"supporting_sources": c.supporting_sources,
|
|
"refuting_sources": c.refuting_sources,
|
|
"reasoning": c.reasoning,
|
|
})
|
|
|
|
return {
|
|
"success": True,
|
|
"overall_confidence": result.overall_confidence,
|
|
"total_claims": result.total_claims,
|
|
"supported_claims": result.supported_claims,
|
|
"refuted_claims": result.refuted_claims,
|
|
"insufficient_claims": result.insufficient_claims,
|
|
"timestamp": result.timestamp,
|
|
"claims": claims,
|
|
}
|
|
except Exception as e:
|
|
return {"success": False, "error": str(e)}
|
|
|
|
async def seo_analyze(self, request: BlogSEOAnalyzeRequest) -> BlogSEOAnalyzeResponse:
|
|
"""Wrap existing SEO tools to produce unified analysis for blog content."""
|
|
from services.seo_tools.on_page_seo_service import OnPageSEOService
|
|
from services.seo_tools.image_alt_service import ImageAltService
|
|
from services.seo_tools.content_strategy_service import ContentStrategyService
|
|
|
|
content = request.content or ""
|
|
target_keywords = request.keywords or []
|
|
|
|
# On-page analysis (treat content as a virtual URL/document for now)
|
|
on_page = OnPageSEOService()
|
|
on_page_result = await on_page.analyze_on_page_seo(url="about:blank", target_keywords=target_keywords)
|
|
|
|
# Image alt coverage (placeholder: no images in raw content yet)
|
|
try:
|
|
image_alt_service = ImageAltService()
|
|
image_alt_status = {"total_images": 0, "missing_alt": 0}
|
|
except Exception:
|
|
image_alt_status = {"total_images": 0, "missing_alt": 0}
|
|
|
|
# Strategy hints (keywords/topics)
|
|
try:
|
|
strategy = ContentStrategyService()
|
|
strategy_hints = await strategy.analyze_content_topics(content=content)
|
|
except Exception:
|
|
strategy_hints = {"topics": [], "gaps": []}
|
|
|
|
# Lightweight markdown parsing for headings/links/keywords
|
|
import re
|
|
content_text = content or ""
|
|
words = re.findall(r"[A-Za-z0-9']+", content_text)
|
|
total_words = max(len(words), 1)
|
|
heading_lines = content_text.splitlines()
|
|
h1 = sum(1 for ln in heading_lines if ln.startswith('# '))
|
|
h2 = sum(1 for ln in heading_lines if ln.startswith('## '))
|
|
h3 = sum(1 for ln in heading_lines if ln.startswith('### '))
|
|
md_links = re.findall(r"\[([^\]]+)\]\(([^)]+)\)", content_text)
|
|
external_links = [u for (_t, u) in md_links if u.startswith('http')]
|
|
|
|
# Keyword density
|
|
density_map: Dict[str, Any] = {"target_keywords": target_keywords}
|
|
for kw in target_keywords:
|
|
try:
|
|
occurrences = len(re.findall(re.escape(kw), content_text, flags=re.IGNORECASE))
|
|
except re.error:
|
|
occurrences = 0
|
|
density_map[kw] = {
|
|
"occurrences": occurrences,
|
|
"density": round(occurrences / total_words, 4)
|
|
}
|
|
|
|
# Build unified response
|
|
recommendations: List[str] = []
|
|
if isinstance(on_page_result.get("recommendations"), list):
|
|
recommendations.extend(on_page_result["recommendations"])
|
|
if strategy_hints.get("gaps"):
|
|
recommendations.append("Cover missing topics: " + ", ".join(strategy_hints["gaps"]))
|
|
if not external_links:
|
|
recommendations.append("Add at least one credible external link to authoritative sources.")
|
|
if h2 < 2:
|
|
recommendations.append("Increase number of H2 sections for better structure.")
|
|
|
|
# Internal link suggestions: generate anchors for H2s and propose cross-links
|
|
def to_anchor(h: str) -> str:
|
|
import re
|
|
a = re.sub(r"[^a-z0-9\s-]", "", h.lower())
|
|
a = re.sub(r"\s+", "-", a).strip('-')
|
|
return a
|
|
h2_headings = [ln[3:].strip() for ln in heading_lines if ln.startswith('## ')]
|
|
anchors = [to_anchor(h) for h in h2_headings]
|
|
internal_link_suggestions = []
|
|
for i in range(len(anchors)-1):
|
|
internal_link_suggestions.append({
|
|
"from": h2_headings[i],
|
|
"to": h2_headings[i+1],
|
|
"anchor": f"#{anchors[i+1]}",
|
|
"suggestion": f"Add internal link from '{h2_headings[i]}' to '{h2_headings[i+1]}'"
|
|
})
|
|
|
|
return BlogSEOAnalyzeResponse(
|
|
success=True,
|
|
seo_score=float(on_page_result.get("overall_score", 75)),
|
|
density=density_map,
|
|
structure={
|
|
**on_page_result.get("heading_structure", {}),
|
|
"markdown_headings": {"h1": h1, "h2": h2, "h3": h3},
|
|
"links": {"total": len(md_links), "external": len(external_links)}
|
|
},
|
|
readability=on_page_result.get("content_analysis", {}),
|
|
link_suggestions=([{"suggestion": "Add external citation links for key claims."}] if not external_links else []) + internal_link_suggestions,
|
|
image_alt_status=image_alt_status,
|
|
recommendations=recommendations,
|
|
)
|
|
|
|
async def seo_metadata(self, request: BlogSEOMetadataRequest) -> BlogSEOMetadataResponse:
|
|
# TODO: Generate SEO metadata using existing services
|
|
return BlogSEOMetadataResponse(
|
|
success=True,
|
|
title_options=[request.title or "Generated SEO Title"],
|
|
meta_descriptions=["Compelling meta description..."],
|
|
open_graph={"title": request.title or "OG Title", "image": ""},
|
|
twitter_card={"card": "summary_large_image"},
|
|
schema={"@type": "Article"},
|
|
)
|
|
|
|
async def publish(self, request: BlogPublishRequest) -> BlogPublishResponse:
|
|
# TODO: Call Wix/WordPress adapters to publish
|
|
return BlogPublishResponse(success=True, platform=request.platform, url="https://example.com/post")
|
|
|
|
|