- Fix text selection menu not showing: wire contentRef via inputRef on multiline TextField - Fix blog title not truncating: add min-w-0 for flex item overflow - Fix outline generation 500: escape curly braces in f-string prompt template - Fix content generation 'NoneType not callable': replace SessionLocal() with get_session_for_user(), add db param to MediumBlogGenerator, fix signature mismatch in database_task_manager - Fix writing assistant suggest 500: add auth + user_id to API endpoint and service, replace sync requests with httpx.AsyncClient - Fix hallucination detector 404: explicitly include router in main.py and app.py - Fix missing error_data in task failure responses - Hide CopilotKit web inspector button - Remove hardcoded fallback suggestions from SmartTypingAssist - Fix stale closure refs in SmartTypingAssist handleTypingChange - Add two-column editor layout, stats bar, section hover menu - Various subscription, billing, and research module improvements
290 lines
12 KiB
Python
290 lines
12 KiB
Python
"""
|
|
Medium Blog Generator Service
|
|
|
|
Handles generation of medium-length blogs (≤1000 words) using structured AI calls.
|
|
"""
|
|
|
|
import time
|
|
import json
|
|
from typing import Dict, Any, List
|
|
from loguru import logger
|
|
from fastapi import HTTPException
|
|
from sqlalchemy.orm import Session
|
|
|
|
from models.blog_models import (
|
|
MediumBlogGenerateRequest,
|
|
MediumBlogGenerateResult,
|
|
MediumGeneratedSection,
|
|
ResearchSource,
|
|
)
|
|
from services.llm_providers.main_text_generation import llm_text_gen
|
|
from services.cache.persistent_content_cache import persistent_content_cache
|
|
|
|
|
|
class MediumBlogGenerator:
|
|
"""Service for generating medium-length blog content using structured AI calls."""
|
|
|
|
def __init__(self):
|
|
self.cache = persistent_content_cache
|
|
|
|
async def generate_medium_blog_with_progress(self, req: MediumBlogGenerateRequest, task_id: str, user_id: str, db: Session = None) -> MediumBlogGenerateResult:
|
|
"""Use Gemini structured JSON to generate a medium-length blog in one call.
|
|
|
|
Args:
|
|
req: Medium blog generation request
|
|
task_id: Task ID for progress updates
|
|
user_id: User ID (required for subscription checks and usage tracking)
|
|
|
|
Raises:
|
|
ValueError: If user_id is not provided
|
|
"""
|
|
if not user_id:
|
|
raise ValueError("user_id is required for medium blog generation (subscription checks and usage tracking)")
|
|
|
|
import time
|
|
start = time.time()
|
|
|
|
# Prepare sections data for cache key generation
|
|
sections_for_cache = []
|
|
for s in req.sections:
|
|
sections_for_cache.append({
|
|
"id": s.id,
|
|
"heading": s.heading,
|
|
"keyPoints": getattr(s, "key_points", []) or getattr(s, "keyPoints", []),
|
|
"subheadings": getattr(s, "subheadings", []),
|
|
"keywords": getattr(s, "keywords", []),
|
|
"targetWords": getattr(s, "target_words", None) or getattr(s, "targetWords", None),
|
|
})
|
|
|
|
# Check cache first
|
|
cached_result = self.cache.get_cached_content(
|
|
keywords=req.researchKeywords or [],
|
|
sections=sections_for_cache,
|
|
global_target_words=req.globalTargetWords or 1000,
|
|
persona_data=req.persona.dict() if req.persona else None,
|
|
tone=req.tone,
|
|
audience=req.audience
|
|
)
|
|
|
|
if cached_result:
|
|
logger.info(f"Using cached content for keywords: {req.researchKeywords} (saved expensive generation)")
|
|
# Add cache hit marker to distinguish from fresh generation
|
|
cached_result['generation_time_ms'] = 0 # Mark as cache hit
|
|
cached_result['cache_hit'] = True
|
|
return MediumBlogGenerateResult(**cached_result)
|
|
|
|
# Cache miss - proceed with AI generation
|
|
logger.info(f"Cache miss - generating new content for keywords: {req.researchKeywords}")
|
|
|
|
# Build schema expected from the model
|
|
schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"title": {"type": "string"},
|
|
"sections": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"id": {"type": "string"},
|
|
"heading": {"type": "string"},
|
|
"content": {"type": "string"},
|
|
"wordCount": {"type": "number"},
|
|
"sources": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {"title": {"type": "string"}, "url": {"type": "string"}},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
# Compose prompt
|
|
def section_block(s):
|
|
return {
|
|
"id": s.id,
|
|
"heading": s.heading,
|
|
"outline": {
|
|
"keyPoints": getattr(s, "key_points", []) or getattr(s, "keyPoints", []),
|
|
"subheadings": getattr(s, "subheadings", []),
|
|
"keywords": getattr(s, "keywords", []),
|
|
"targetWords": getattr(s, "target_words", None) or getattr(s, "targetWords", None),
|
|
"references": [
|
|
{"title": r.title, "url": r.url} for r in getattr(s, "references", [])
|
|
],
|
|
},
|
|
}
|
|
|
|
payload = {
|
|
"title": req.title,
|
|
"globalTargetWords": req.globalTargetWords or 1000,
|
|
"persona": req.persona.dict() if req.persona else None,
|
|
"tone": req.tone,
|
|
"audience": req.audience,
|
|
"sections": [section_block(s) for s in req.sections],
|
|
}
|
|
|
|
# Build persona-aware system prompt
|
|
persona_context = ""
|
|
if req.persona:
|
|
persona_context = f"""
|
|
PERSONA GUIDELINES:
|
|
- Industry: {req.persona.industry or 'General'}
|
|
- Tone: {req.persona.tone or 'Professional'}
|
|
- Audience: {req.persona.audience or 'General readers'}
|
|
- Persona ID: {req.persona.persona_id or 'Default'}
|
|
|
|
Write content that reflects this persona's expertise and communication style.
|
|
Use industry-specific terminology and examples where appropriate.
|
|
Maintain consistent voice and authority throughout all sections.
|
|
"""
|
|
|
|
system = (
|
|
"You are a professional blog writer with deep expertise in your field. "
|
|
"Generate high-quality, persona-driven content for each section based on the provided outline. "
|
|
"Write engaging, informative content that follows the section's key points and target word count. "
|
|
"Ensure the content flows naturally and maintains consistent voice and authority. "
|
|
"Format content with proper paragraph breaks using double line breaks (\\n\\n) between paragraphs. "
|
|
"Structure content with clear paragraphs - aim for 2-4 sentences per paragraph. "
|
|
f"{persona_context}"
|
|
"Return ONLY valid JSON with no markdown formatting or explanations."
|
|
)
|
|
|
|
# Build persona-specific content instructions
|
|
persona_instructions = ""
|
|
if req.persona:
|
|
industry = req.persona.industry or 'General'
|
|
tone = req.persona.tone or 'Professional'
|
|
audience = req.persona.audience or 'General readers'
|
|
|
|
persona_instructions = f"""
|
|
PERSONA-DRIVEN CONTENT REQUIREMENTS:
|
|
- Write as an expert in {industry} industry
|
|
- Use {tone} tone appropriate for {audience}
|
|
- Include industry-specific examples and terminology
|
|
- Demonstrate authority and expertise in the field
|
|
- Use language that resonates with {audience}
|
|
- Maintain consistent voice that reflects this persona's expertise
|
|
"""
|
|
|
|
prompt = (
|
|
f"Write blog content for the following sections. Each section should be {req.globalTargetWords or 1000} words total, distributed across all sections.\n\n"
|
|
f"Blog Title: {req.title}\n\n"
|
|
"For each section, write engaging content that:\n"
|
|
"- Follows the key points provided\n"
|
|
"- Uses the suggested keywords naturally\n"
|
|
"- Meets the target word count\n"
|
|
"- Maintains professional tone\n"
|
|
"- References the provided sources when relevant\n"
|
|
"- Breaks content into clear paragraphs (2-4 sentences each)\n"
|
|
"- Uses double line breaks (\\n\\n) between paragraphs for proper formatting\n"
|
|
"- Starts with an engaging opening paragraph\n"
|
|
"- Ends with a strong concluding paragraph\n"
|
|
f"{persona_instructions}\n"
|
|
"IMPORTANT: Format the 'content' field with proper paragraph breaks using \\n\\n between paragraphs.\n\n"
|
|
"Return a JSON object with 'title' and 'sections' array. Each section should have 'id', 'heading', 'content', and 'wordCount'.\n\n"
|
|
f"Sections to write:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
|
|
)
|
|
|
|
try:
|
|
ai_resp = llm_text_gen(
|
|
prompt=prompt,
|
|
json_struct=schema,
|
|
system_prompt=system,
|
|
user_id=user_id
|
|
)
|
|
except HTTPException:
|
|
# Re-raise HTTPExceptions (e.g., 429 subscription limit) to preserve error details
|
|
raise
|
|
except Exception as llm_error:
|
|
# Wrap other errors
|
|
logger.error(f"AI generation failed: {llm_error}")
|
|
raise Exception(f"AI generation failed: {str(llm_error)}")
|
|
|
|
# Check for errors in AI response
|
|
if not ai_resp or ai_resp.get("error"):
|
|
error_msg = ai_resp.get("error", "Empty generation result from model") if ai_resp else "No response from model"
|
|
logger.error(f"AI generation failed: {error_msg}")
|
|
raise Exception(f"AI generation failed: {error_msg}")
|
|
|
|
# Normalize output
|
|
title = ai_resp.get("title") or req.title
|
|
out_sections = []
|
|
for s in ai_resp.get("sections", []) or []:
|
|
out_sections.append(
|
|
MediumGeneratedSection(
|
|
id=str(s.get("id")),
|
|
heading=s.get("heading") or "",
|
|
content=s.get("content") or "",
|
|
wordCount=int(s.get("wordCount") or 0),
|
|
sources=[
|
|
# map to ResearchSource shape if possible; keep minimal
|
|
ResearchSource(title=src.get("title", ""), url=src.get("url", ""))
|
|
for src in (s.get("sources") or [])
|
|
] or None,
|
|
)
|
|
)
|
|
|
|
duration_ms = int((time.time() - start) * 1000)
|
|
result = MediumBlogGenerateResult(
|
|
success=True,
|
|
title=title,
|
|
sections=out_sections,
|
|
model="gemini-2.5-flash",
|
|
generation_time_ms=duration_ms,
|
|
safety_flags=None,
|
|
)
|
|
|
|
# Cache the result for future use
|
|
try:
|
|
self.cache.cache_content(
|
|
keywords=req.researchKeywords or [],
|
|
sections=sections_for_cache,
|
|
global_target_words=req.globalTargetWords or 1000,
|
|
persona_data=req.persona.dict() if req.persona else None,
|
|
tone=req.tone or "professional",
|
|
audience=req.audience or "general",
|
|
result=result.dict()
|
|
)
|
|
logger.info(f"Cached content result for keywords: {req.researchKeywords}")
|
|
except Exception as cache_error:
|
|
logger.warning(f"Failed to cache content result: {cache_error}")
|
|
# Don't fail the entire operation if caching fails
|
|
|
|
# Save content to user workspace if db session is available
|
|
if user_id and db:
|
|
try:
|
|
# Construct full blog content
|
|
full_content = f"# {result.title}\n\n"
|
|
for section in result.sections:
|
|
full_content += f"## {section.heading}\n\n"
|
|
full_content += f"{section.content}\n\n"
|
|
|
|
# Save to workspace
|
|
save_and_track_text_content(
|
|
db=db,
|
|
user_id=user_id,
|
|
content=full_content,
|
|
source_module="medium_blog_writer",
|
|
title=result.title,
|
|
description=f"Generated medium blog: {result.title}",
|
|
tags=req.researchKeywords or ["medium_blog", "ai_generated"],
|
|
asset_metadata={
|
|
"model": result.model,
|
|
"generation_time_ms": result.generation_time_ms,
|
|
"word_count": sum(s.wordCount for s in result.sections)
|
|
},
|
|
subdirectory="medium_blogs"
|
|
)
|
|
logger.info(f"Saved medium blog content to user workspace for user {user_id}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to save medium blog content to workspace: {e}")
|
|
elif not db:
|
|
logger.warning("Database session not provided, skipping workspace save for medium blog")
|
|
|
|
return result
|