Files
ALwrity/backend/services/blog_writer/content/medium_blog_generator.py
ajaysi 644e72d289 feat: Brainstorm Topics with GSC + Issue #518 fixes + Blog Editor enhancements
Issue #518 - Subscription not updating after checkout:
- Fix stale closure in SubscriptionContext checkout polling (use subscriptionRef)
- Move checkout success polling from InitialRouteHandler into SubscriptionContext
- Remove redundant polling code from InitialRouteHandler
- Fix plan label: 'Free' instead of 'No Plan', proper capitalization
- Add plan refresh button in UserBadge
- Add 'View Costing Details' to UserBadge dropdown
- Rename 'ALwrity Podcast Maker' to 'Podcast Creator' across UI
- Clean subscription=success URL param after verification

Blog Writer WYSIWYG Editor enhancements:
- Per-section preview toggle (view/edit icons)
- Enhanced hover-based toolbar
- Circular SVG progress stats bar with detailed tooltip
- Research tool chips in stats bar footer
- Per-section TTS with useTextToSpeech hook (browser native)
- Full blog preview modal with print/PDF support
- PlayAllTTSButton: sequential playback with progress bar
- OnThisPageNav: floating sidebar with scroll tracking
- Section data attributes for scroll anchoring

GSC Brainstorm Topics feature:
- Backend: gsc_brainstorm_service.py (rule-based + LLM recommendations)
- Backend: POST /gsc/brainstorm endpoint with 3-word minimum validation
- Frontend: gscBrainstorm.ts API client
- Frontend: useGSCBrainstormConnection hook (popup OAuth, no /onboarding redirect)
- Frontend: useGSCBrainstorm hook (connect check + brainstorm call)
- Frontend: GSCBrainstormModal (3-tab results: Opportunities, Gaps, AI Recs)
- Frontend: BrainstormButton (visible at 3+ words, GSC connect overlay)
- Wire BrainstormButton into ManualResearchForm and ResearchAction
- Add blog_writer to gsc_auth router features for ALWRITY_ENABLED_FEATURES
2026-05-20 22:44:15 +05:30

292 lines
12 KiB
Python

"""
Medium Blog Generator Service
Handles generation of medium-length blogs (≤1000 words) using structured AI calls.
"""
import time
import json
from typing import Dict, Any, List
from loguru import logger
from fastapi import HTTPException
from sqlalchemy.orm import Session
from models.blog_models import (
MediumBlogGenerateRequest,
MediumBlogGenerateResult,
MediumGeneratedSection,
ResearchSource,
)
from services.llm_providers.main_text_generation import llm_text_gen
from services.cache.persistent_content_cache import persistent_content_cache
class MediumBlogGenerator:
"""Service for generating medium-length blog content using structured AI calls."""
def __init__(self):
self.cache = persistent_content_cache
async def generate_medium_blog_with_progress(self, req: MediumBlogGenerateRequest, task_id: str, user_id: str, db: Session = None) -> MediumBlogGenerateResult:
"""Use Gemini structured JSON to generate a medium-length blog in one call.
Args:
req: Medium blog generation request
task_id: Task ID for progress updates
user_id: User ID (required for subscription checks and usage tracking)
Raises:
ValueError: If user_id is not provided
"""
if not user_id:
raise ValueError("user_id is required for medium blog generation (subscription checks and usage tracking)")
import time
start = time.time()
# Prepare sections data for cache key generation
sections_for_cache = []
for s in req.sections:
sections_for_cache.append({
"id": s.id,
"heading": s.heading,
"keyPoints": getattr(s, "key_points", []) or getattr(s, "keyPoints", []),
"subheadings": getattr(s, "subheadings", []),
"keywords": getattr(s, "keywords", []),
"targetWords": getattr(s, "target_words", None) or getattr(s, "targetWords", None),
})
# Check cache first
cached_result = self.cache.get_cached_content(
keywords=req.researchKeywords or [],
sections=sections_for_cache,
global_target_words=req.globalTargetWords or 1000,
persona_data=req.persona.dict() if req.persona else None,
tone=req.tone,
audience=req.audience
)
if cached_result:
logger.info(f"Using cached content for keywords: {req.researchKeywords} (saved expensive generation)")
# Add cache hit marker to distinguish from fresh generation
cached_result['generation_time_ms'] = 0 # Mark as cache hit
cached_result['cache_hit'] = True
return MediumBlogGenerateResult(**cached_result)
# Cache miss - proceed with AI generation
logger.info(f"Cache miss - generating new content for keywords: {req.researchKeywords}")
# Build schema expected from the model
schema = {
"type": "object",
"properties": {
"title": {"type": "string"},
"sections": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {"type": "string"},
"heading": {"type": "string"},
"content": {"type": "string"},
"wordCount": {"type": "number"},
"sources": {
"type": "array",
"items": {
"type": "object",
"properties": {"title": {"type": "string"}, "url": {"type": "string"}},
},
},
},
},
},
},
}
# Compose prompt
def section_block(s):
return {
"id": s.id,
"heading": s.heading,
"outline": {
"keyPoints": getattr(s, "key_points", []) or getattr(s, "keyPoints", []),
"subheadings": getattr(s, "subheadings", []),
"keywords": getattr(s, "keywords", []),
"targetWords": getattr(s, "target_words", None) or getattr(s, "targetWords", None),
"references": [
{"title": r.title, "url": r.url} for r in getattr(s, "references", [])
],
},
}
payload = {
"title": req.title,
"globalTargetWords": req.globalTargetWords or 1000,
"persona": req.persona.dict() if req.persona else None,
"tone": req.tone,
"audience": req.audience,
"sections": [section_block(s) for s in req.sections],
}
# Build persona-aware system prompt
persona_context = ""
if req.persona:
persona_context = f"""
PERSONA GUIDELINES:
- Industry: {req.persona.industry or 'General'}
- Tone: {req.persona.tone or 'Professional'}
- Audience: {req.persona.audience or 'General readers'}
- Persona ID: {req.persona.persona_id or 'Default'}
Write content that reflects this persona's expertise and communication style.
Use industry-specific terminology and examples where appropriate.
Maintain consistent voice and authority throughout all sections.
"""
system = (
"You are a professional blog writer with deep expertise in your field. "
"Generate high-quality, persona-driven content for each section based on the provided outline. "
"Write engaging, informative content that follows the section's key points and target word count. "
"Ensure the content flows naturally and maintains consistent voice and authority. "
"Format content with proper paragraph breaks using double line breaks (\\n\\n) between paragraphs. "
"Structure content with clear paragraphs - aim for 2-4 sentences per paragraph. "
f"{persona_context}"
"Return ONLY valid JSON with no markdown formatting or explanations."
)
# Build persona-specific content instructions
persona_instructions = ""
if req.persona:
industry = req.persona.industry or 'General'
tone = req.persona.tone or 'Professional'
audience = req.persona.audience or 'General readers'
persona_instructions = f"""
PERSONA-DRIVEN CONTENT REQUIREMENTS:
- Write as an expert in {industry} industry
- Use {tone} tone appropriate for {audience}
- Include industry-specific examples and terminology
- Demonstrate authority and expertise in the field
- Use language that resonates with {audience}
- Maintain consistent voice that reflects this persona's expertise
"""
prompt = (
f"Write blog content for the following sections. Each section should be {req.globalTargetWords or 1000} words total, distributed across all sections.\n\n"
f"Blog Title: {req.title}\n\n"
"For each section, write engaging content that:\n"
"- Follows the key points provided\n"
"- Uses the suggested keywords naturally\n"
"- Meets the target word count\n"
"- Maintains professional tone\n"
"- References the provided sources when relevant\n"
"- Breaks content into clear paragraphs (2-4 sentences each)\n"
"- Uses double line breaks (\\n\\n) between paragraphs for proper formatting\n"
"- Starts with an engaging opening paragraph\n"
"- Ends with a strong concluding paragraph\n"
f"{persona_instructions}\n"
"IMPORTANT: Format the 'content' field with proper paragraph breaks using \\n\\n between paragraphs.\n\n"
"Return a JSON object with 'title' and 'sections' array. Each section should have 'id', 'heading', 'content', and 'wordCount'.\n\n"
f"Sections to write:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
)
try:
ai_resp = llm_text_gen(
prompt=prompt,
json_struct=schema,
system_prompt=system,
user_id=user_id
)
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) to preserve error details
raise
except Exception as llm_error:
# Wrap other errors
logger.error(f"AI generation failed: {llm_error}")
raise Exception(f"AI generation failed: {str(llm_error)}")
# Check for errors in AI response
if not ai_resp or ai_resp.get("error"):
error_msg = ai_resp.get("error", "Empty generation result from model") if ai_resp else "No response from model"
logger.error(f"AI generation failed: {error_msg}")
raise Exception(f"AI generation failed: {error_msg}")
# Normalize output
title = ai_resp.get("title") or req.title
out_sections = []
for s in ai_resp.get("sections", []) or []:
out_sections.append(
MediumGeneratedSection(
id=str(s.get("id")),
heading=s.get("heading") or "",
content=s.get("content") or "",
wordCount=int(s.get("wordCount") or 0),
sources=[
# map to ResearchSource shape if possible; keep minimal
ResearchSource(title=src.get("title", ""), url=src.get("url", ""))
for src in (s.get("sources") or [])
] or None,
)
)
duration_ms = int((time.time() - start) * 1000)
result = MediumBlogGenerateResult(
success=True,
title=title,
sections=out_sections,
model="gemini-2.5-flash",
generation_time_ms=duration_ms,
safety_flags=None,
)
# Cache the result for future use
try:
self.cache.cache_content(
keywords=req.researchKeywords or [],
sections=sections_for_cache,
global_target_words=req.globalTargetWords or 1000,
persona_data=req.persona.dict() if req.persona else None,
tone=req.tone or "professional",
audience=req.audience or "general",
result=result.dict()
)
logger.info(f"Cached content result for keywords: {req.researchKeywords}")
except Exception as cache_error:
logger.warning(f"Failed to cache content result: {cache_error}")
# Don't fail the entire operation if caching fails
# Save content to user workspace if db session is available
if user_id and db:
try:
# Construct full blog content
full_content = f"# {result.title}\n\n"
for section in result.sections:
full_content += f"## {section.heading}\n\n"
full_content += f"{section.content}\n\n"
# Save to workspace
save_and_track_text_content(
db=db,
user_id=user_id,
content=full_content,
source_module="blog_writer",
title=result.title,
description=f"Blog: {result.title}",
tags=req.researchKeywords or ["blog", "ai_generated"],
asset_metadata={
"blog_type": "medium",
"model": result.model,
"generation_time_ms": result.generation_time_ms,
"word_count": sum(s.wordCount for s in result.sections),
"section_count": len(result.sections),
},
subdirectory="blogs"
)
logger.info(f"Saved medium blog content to user workspace for user {user_id}")
except Exception as e:
logger.error(f"Failed to save medium blog content to workspace: {e}")
elif not db:
logger.warning("Database session not provided, skipping workspace save for medium blog")
return result