Updated SEO Analysis Modal

This commit is contained in:
ajaysi
2025-09-22 21:02:32 +05:30
parent f98d49cea7
commit 12119d418b
38 changed files with 5742 additions and 2337 deletions

View File

@@ -0,0 +1,209 @@
"""
Blog Rewriter Service
Handles blog rewriting based on user feedback using structured AI calls.
"""
import time
import uuid
from typing import Dict, Any
from loguru import logger
from services.llm_providers.gemini_provider import gemini_structured_json_response
class BlogRewriter:
"""Service for rewriting blog content based on user feedback."""
def __init__(self, task_manager):
self.task_manager = task_manager
def start_blog_rewrite(self, request: Dict[str, Any]) -> str:
"""Start blog rewrite task with user feedback."""
try:
# Extract request data
title = request.get("title", "Untitled Blog")
sections = request.get("sections", [])
research = request.get("research", {})
outline = request.get("outline", [])
feedback = request.get("feedback", "")
tone = request.get("tone")
audience = request.get("audience")
focus = request.get("focus")
if not sections:
raise ValueError("No sections provided for rewrite")
if not feedback or len(feedback.strip()) < 10:
raise ValueError("Feedback is required and must be at least 10 characters")
# Create task for rewrite
task_id = f"rewrite_{int(time.time())}_{uuid.uuid4().hex[:8]}"
# Start the rewrite task
self.task_manager.start_task(
task_id,
self._execute_blog_rewrite,
title=title,
sections=sections,
research=research,
outline=outline,
feedback=feedback,
tone=tone,
audience=audience,
focus=focus
)
logger.info(f"Blog rewrite task started: {task_id}")
return task_id
except Exception as e:
logger.error(f"Failed to start blog rewrite: {e}")
raise
async def _execute_blog_rewrite(self, task_id: str, **kwargs):
"""Execute the blog rewrite task."""
try:
title = kwargs.get("title", "Untitled Blog")
sections = kwargs.get("sections", [])
research = kwargs.get("research", {})
outline = kwargs.get("outline", [])
feedback = kwargs.get("feedback", "")
tone = kwargs.get("tone")
audience = kwargs.get("audience")
focus = kwargs.get("focus")
# Update task status
self.task_manager.update_task_status(task_id, "processing", "Analyzing current content and feedback...")
# Build rewrite prompt with user feedback
system_prompt = f"""You are an expert blog writer tasked with rewriting content based on user feedback.
Current Blog Title: {title}
User Feedback: {feedback}
{f"Desired Tone: {tone}" if tone else ""}
{f"Target Audience: {audience}" if audience else ""}
{f"Focus Area: {focus}" if focus else ""}
Your task is to rewrite the blog content to address the user's feedback while maintaining the core structure and research insights."""
# Prepare content for rewrite
full_content = f"Title: {title}\n\n"
for section in sections:
full_content += f"Section: {section.get('heading', 'Untitled')}\n"
full_content += f"Content: {section.get('content', '')}\n\n"
# Create rewrite prompt
rewrite_prompt = f"""
Based on the user feedback and current blog content, rewrite the blog to address their concerns and preferences.
Current Content:
{full_content}
User Feedback: {feedback}
{f"Desired Tone: {tone}" if tone else ""}
{f"Target Audience: {audience}" if audience else ""}
{f"Focus Area: {focus}" if focus else ""}
Please rewrite the blog content in the following JSON format:
{{
"title": "New or improved blog title",
"sections": [
{{
"id": "section_id",
"heading": "Section heading",
"content": "Rewritten section content"
}}
]
}}
Guidelines:
1. Address the user's feedback directly
2. Maintain the research insights and factual accuracy
3. Improve flow, clarity, and engagement
4. Keep the same section structure unless feedback suggests otherwise
5. Ensure content is well-formatted with proper paragraphs
"""
# Update task status
self.task_manager.update_task_status(task_id, "processing", "Generating rewritten content...")
# Use structured JSON generation
schema = {
"type": "object",
"properties": {
"title": {"type": "string"},
"sections": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {"type": "string"},
"heading": {"type": "string"},
"content": {"type": "string"}
}
}
}
}
}
result = gemini_structured_json_response(
prompt=rewrite_prompt,
schema=schema,
temperature=0.7,
max_tokens=4096,
system_prompt=system_prompt
)
logger.info(f"Gemini response for rewrite task {task_id}: {result}")
# Check if we have a valid result - handle both multi-section and single-section formats
is_valid_multi_section = result and not result.get("error") and result.get("title") and result.get("sections")
is_valid_single_section = result and not result.get("error") and (result.get("heading") or result.get("title")) and result.get("content")
if is_valid_multi_section or is_valid_single_section:
# If single section format, convert to multi-section format for consistency
if is_valid_single_section and not is_valid_multi_section:
# Convert single section to multi-section format
converted_result = {
"title": result.get("heading") or result.get("title") or "Rewritten Blog",
"sections": [
{
"id": result.get("id") or "section_1",
"heading": result.get("heading") or "Main Content",
"content": result.get("content", "")
}
]
}
result = converted_result
logger.info(f"Converted single section response to multi-section format for task {task_id}")
# Update task status with success
self.task_manager.update_task_status(
task_id,
"completed",
"Blog rewrite completed successfully!",
result=result
)
logger.info(f"Blog rewrite completed successfully: {task_id}")
else:
# More detailed error handling
if not result:
error_msg = "No response from AI"
elif result.get("error"):
error_msg = f"AI error: {result.get('error')}"
elif not (result.get("title") or result.get("heading")):
error_msg = "AI response missing title/heading"
elif not (result.get("sections") or result.get("content")):
error_msg = "AI response missing sections/content"
else:
error_msg = "AI response has invalid structure"
self.task_manager.update_task_status(task_id, "failed", f"Rewrite failed: {error_msg}")
logger.error(f"Blog rewrite failed: {error_msg}")
except Exception as e:
error_msg = f"Blog rewrite error: {str(e)}"
self.task_manager.update_task_status(task_id, "failed", error_msg)
logger.error(f"Blog rewrite task failed: {e}")
raise

View File

@@ -0,0 +1,237 @@
"""
Medium Blog Generator Service
Handles generation of medium-length blogs (≤1000 words) using structured AI calls.
"""
import time
import json
from typing import Dict, Any, List
from loguru import logger
from models.blog_models import (
MediumBlogGenerateRequest,
MediumBlogGenerateResult,
MediumGeneratedSection,
ResearchSource,
)
from services.llm_providers.gemini_provider import gemini_structured_json_response
from services.cache.persistent_content_cache import persistent_content_cache
class MediumBlogGenerator:
"""Service for generating medium-length blog content using structured AI calls."""
def __init__(self):
self.cache = persistent_content_cache
async def generate_medium_blog_with_progress(self, req: MediumBlogGenerateRequest, task_id: str) -> MediumBlogGenerateResult:
"""Use Gemini structured JSON to generate a medium-length blog in one call."""
import time
start = time.time()
# Prepare sections data for cache key generation
sections_for_cache = []
for s in req.sections:
sections_for_cache.append({
"id": s.id,
"heading": s.heading,
"keyPoints": getattr(s, "key_points", []) or getattr(s, "keyPoints", []),
"subheadings": getattr(s, "subheadings", []),
"keywords": getattr(s, "keywords", []),
"targetWords": getattr(s, "target_words", None) or getattr(s, "targetWords", None),
})
# Check cache first
cached_result = self.cache.get_cached_content(
keywords=req.researchKeywords or [],
sections=sections_for_cache,
global_target_words=req.globalTargetWords or 1000,
persona_data=req.persona.dict() if req.persona else None,
tone=req.tone,
audience=req.audience
)
if cached_result:
logger.info(f"Using cached content for keywords: {req.researchKeywords} (saved expensive generation)")
# Add cache hit marker to distinguish from fresh generation
cached_result['generation_time_ms'] = 0 # Mark as cache hit
cached_result['cache_hit'] = True
return MediumBlogGenerateResult(**cached_result)
# Cache miss - proceed with AI generation
logger.info(f"Cache miss - generating new content for keywords: {req.researchKeywords}")
# Build schema expected from the model
schema = {
"type": "object",
"properties": {
"title": {"type": "string"},
"sections": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {"type": "string"},
"heading": {"type": "string"},
"content": {"type": "string"},
"wordCount": {"type": "number"},
"sources": {
"type": "array",
"items": {
"type": "object",
"properties": {"title": {"type": "string"}, "url": {"type": "string"}},
},
},
},
},
},
},
}
# Compose prompt
def section_block(s):
return {
"id": s.id,
"heading": s.heading,
"outline": {
"keyPoints": getattr(s, "key_points", []) or getattr(s, "keyPoints", []),
"subheadings": getattr(s, "subheadings", []),
"keywords": getattr(s, "keywords", []),
"targetWords": getattr(s, "target_words", None) or getattr(s, "targetWords", None),
"references": [
{"title": r.title, "url": r.url} for r in getattr(s, "references", [])
],
},
}
payload = {
"title": req.title,
"globalTargetWords": req.globalTargetWords or 1000,
"persona": req.persona.dict() if req.persona else None,
"tone": req.tone,
"audience": req.audience,
"sections": [section_block(s) for s in req.sections],
}
# Build persona-aware system prompt
persona_context = ""
if req.persona:
persona_context = f"""
PERSONA GUIDELINES:
- Industry: {req.persona.industry or 'General'}
- Tone: {req.persona.tone or 'Professional'}
- Audience: {req.persona.audience or 'General readers'}
- Persona ID: {req.persona.persona_id or 'Default'}
Write content that reflects this persona's expertise and communication style.
Use industry-specific terminology and examples where appropriate.
Maintain consistent voice and authority throughout all sections.
"""
system = (
"You are a professional blog writer with deep expertise in your field. "
"Generate high-quality, persona-driven content for each section based on the provided outline. "
"Write engaging, informative content that follows the section's key points and target word count. "
"Ensure the content flows naturally and maintains consistent voice and authority. "
"Format content with proper paragraph breaks using double line breaks (\\n\\n) between paragraphs. "
"Structure content with clear paragraphs - aim for 2-4 sentences per paragraph. "
f"{persona_context}"
"Return ONLY valid JSON with no markdown formatting or explanations."
)
# Build persona-specific content instructions
persona_instructions = ""
if req.persona:
industry = req.persona.industry or 'General'
tone = req.persona.tone or 'Professional'
audience = req.persona.audience or 'General readers'
persona_instructions = f"""
PERSONA-DRIVEN CONTENT REQUIREMENTS:
- Write as an expert in {industry} industry
- Use {tone} tone appropriate for {audience}
- Include industry-specific examples and terminology
- Demonstrate authority and expertise in the field
- Use language that resonates with {audience}
- Maintain consistent voice that reflects this persona's expertise
"""
prompt = (
f"Write blog content for the following sections. Each section should be {req.globalTargetWords or 1000} words total, distributed across all sections.\n\n"
f"Blog Title: {req.title}\n\n"
"For each section, write engaging content that:\n"
"- Follows the key points provided\n"
"- Uses the suggested keywords naturally\n"
"- Meets the target word count\n"
"- Maintains professional tone\n"
"- References the provided sources when relevant\n"
"- Breaks content into clear paragraphs (2-4 sentences each)\n"
"- Uses double line breaks (\\n\\n) between paragraphs for proper formatting\n"
"- Starts with an engaging opening paragraph\n"
"- Ends with a strong concluding paragraph\n"
f"{persona_instructions}\n"
"IMPORTANT: Format the 'content' field with proper paragraph breaks using \\n\\n between paragraphs.\n\n"
"Return a JSON object with 'title' and 'sections' array. Each section should have 'id', 'heading', 'content', and 'wordCount'.\n\n"
f"Sections to write:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
)
ai_resp = gemini_structured_json_response(
prompt=prompt,
schema=schema,
temperature=0.2,
max_tokens=8192,
system_prompt=system,
)
# Check for errors in AI response
if not ai_resp or ai_resp.get("error"):
error_msg = ai_resp.get("error", "Empty generation result from model") if ai_resp else "No response from model"
logger.error(f"AI generation failed: {error_msg}")
raise Exception(f"AI generation failed: {error_msg}")
# Normalize output
title = ai_resp.get("title") or req.title
out_sections = []
for s in ai_resp.get("sections", []) or []:
out_sections.append(
MediumGeneratedSection(
id=str(s.get("id")),
heading=s.get("heading") or "",
content=s.get("content") or "",
wordCount=int(s.get("wordCount") or 0),
sources=[
# map to ResearchSource shape if possible; keep minimal
ResearchSource(title=src.get("title", ""), url=src.get("url", ""))
for src in (s.get("sources") or [])
] or None,
)
)
duration_ms = int((time.time() - start) * 1000)
result = MediumBlogGenerateResult(
success=True,
title=title,
sections=out_sections,
model="gemini-2.5-flash",
generation_time_ms=duration_ms,
safety_flags=None,
)
# Cache the result for future use
try:
self.cache.cache_content(
keywords=req.researchKeywords or [],
sections=sections_for_cache,
global_target_words=req.globalTargetWords or 1000,
persona_data=req.persona.dict() if req.persona else None,
tone=req.tone or "professional",
audience=req.audience or "general",
result=result.dict()
)
logger.info(f"Cached content result for keywords: {req.researchKeywords}")
except Exception as cache_error:
logger.warning(f"Failed to cache content result: {cache_error}")
# Don't fail the entire operation if caching fails
return result