""" Medium Blog Generator Service Handles generation of medium-length blogs (≤1000 words) using structured AI calls. """ import time import json from typing import Dict, Any, List from loguru import logger from fastapi import HTTPException from models.blog_models import ( MediumBlogGenerateRequest, MediumBlogGenerateResult, MediumGeneratedSection, ResearchSource, ) from services.llm_providers.main_text_generation import llm_text_gen from services.cache.persistent_content_cache import persistent_content_cache class MediumBlogGenerator: """Service for generating medium-length blog content using structured AI calls.""" def __init__(self): self.cache = persistent_content_cache async def generate_medium_blog_with_progress(self, req: MediumBlogGenerateRequest, task_id: str, user_id: str) -> MediumBlogGenerateResult: """Use Gemini structured JSON to generate a medium-length blog in one call. Args: req: Medium blog generation request task_id: Task ID for progress updates user_id: User ID (required for subscription checks and usage tracking) Raises: ValueError: If user_id is not provided """ if not user_id: raise ValueError("user_id is required for medium blog generation (subscription checks and usage tracking)") import time start = time.time() # Prepare sections data for cache key generation sections_for_cache = [] for s in req.sections: sections_for_cache.append({ "id": s.id, "heading": s.heading, "keyPoints": getattr(s, "key_points", []) or getattr(s, "keyPoints", []), "subheadings": getattr(s, "subheadings", []), "keywords": getattr(s, "keywords", []), "targetWords": getattr(s, "target_words", None) or getattr(s, "targetWords", None), }) # Check cache first cached_result = self.cache.get_cached_content( keywords=req.researchKeywords or [], sections=sections_for_cache, global_target_words=req.globalTargetWords or 1000, persona_data=req.persona.dict() if req.persona else None, tone=req.tone, audience=req.audience ) if cached_result: logger.info(f"Using cached content for keywords: {req.researchKeywords} (saved expensive generation)") # Add cache hit marker to distinguish from fresh generation cached_result['generation_time_ms'] = 0 # Mark as cache hit cached_result['cache_hit'] = True return MediumBlogGenerateResult(**cached_result) # Cache miss - proceed with AI generation logger.info(f"Cache miss - generating new content for keywords: {req.researchKeywords}") # Build schema expected from the model schema = { "type": "object", "properties": { "title": {"type": "string"}, "sections": { "type": "array", "items": { "type": "object", "properties": { "id": {"type": "string"}, "heading": {"type": "string"}, "content": {"type": "string"}, "wordCount": {"type": "number"}, "sources": { "type": "array", "items": { "type": "object", "properties": {"title": {"type": "string"}, "url": {"type": "string"}}, }, }, }, }, }, }, } # Compose prompt def section_block(s): return { "id": s.id, "heading": s.heading, "outline": { "keyPoints": getattr(s, "key_points", []) or getattr(s, "keyPoints", []), "subheadings": getattr(s, "subheadings", []), "keywords": getattr(s, "keywords", []), "targetWords": getattr(s, "target_words", None) or getattr(s, "targetWords", None), "references": [ {"title": r.title, "url": r.url} for r in getattr(s, "references", []) ], }, } payload = { "title": req.title, "globalTargetWords": req.globalTargetWords or 1000, "persona": req.persona.dict() if req.persona else None, "tone": req.tone, "audience": req.audience, "sections": [section_block(s) for s in req.sections], } # Build persona-aware system prompt persona_context = "" if req.persona: persona_context = f""" PERSONA GUIDELINES: - Industry: {req.persona.industry or 'General'} - Tone: {req.persona.tone or 'Professional'} - Audience: {req.persona.audience or 'General readers'} - Persona ID: {req.persona.persona_id or 'Default'} Write content that reflects this persona's expertise and communication style. Use industry-specific terminology and examples where appropriate. Maintain consistent voice and authority throughout all sections. """ system = ( "You are a professional blog writer with deep expertise in your field. " "Generate high-quality, persona-driven content for each section based on the provided outline. " "Write engaging, informative content that follows the section's key points and target word count. " "Ensure the content flows naturally and maintains consistent voice and authority. " "Format content with proper paragraph breaks using double line breaks (\\n\\n) between paragraphs. " "Structure content with clear paragraphs - aim for 2-4 sentences per paragraph. " f"{persona_context}" "Return ONLY valid JSON with no markdown formatting or explanations." ) # Build persona-specific content instructions persona_instructions = "" if req.persona: industry = req.persona.industry or 'General' tone = req.persona.tone or 'Professional' audience = req.persona.audience or 'General readers' persona_instructions = f""" PERSONA-DRIVEN CONTENT REQUIREMENTS: - Write as an expert in {industry} industry - Use {tone} tone appropriate for {audience} - Include industry-specific examples and terminology - Demonstrate authority and expertise in the field - Use language that resonates with {audience} - Maintain consistent voice that reflects this persona's expertise """ prompt = ( f"Write blog content for the following sections. Each section should be {req.globalTargetWords or 1000} words total, distributed across all sections.\n\n" f"Blog Title: {req.title}\n\n" "For each section, write engaging content that:\n" "- Follows the key points provided\n" "- Uses the suggested keywords naturally\n" "- Meets the target word count\n" "- Maintains professional tone\n" "- References the provided sources when relevant\n" "- Breaks content into clear paragraphs (2-4 sentences each)\n" "- Uses double line breaks (\\n\\n) between paragraphs for proper formatting\n" "- Starts with an engaging opening paragraph\n" "- Ends with a strong concluding paragraph\n" f"{persona_instructions}\n" "IMPORTANT: Format the 'content' field with proper paragraph breaks using \\n\\n between paragraphs.\n\n" "Return a JSON object with 'title' and 'sections' array. Each section should have 'id', 'heading', 'content', and 'wordCount'.\n\n" f"Sections to write:\n{json.dumps(payload, ensure_ascii=False, indent=2)}" ) try: ai_resp = llm_text_gen( prompt=prompt, json_struct=schema, system_prompt=system, user_id=user_id ) except HTTPException: # Re-raise HTTPExceptions (e.g., 429 subscription limit) to preserve error details raise except Exception as llm_error: # Wrap other errors logger.error(f"AI generation failed: {llm_error}") raise Exception(f"AI generation failed: {str(llm_error)}") # Check for errors in AI response if not ai_resp or ai_resp.get("error"): error_msg = ai_resp.get("error", "Empty generation result from model") if ai_resp else "No response from model" logger.error(f"AI generation failed: {error_msg}") raise Exception(f"AI generation failed: {error_msg}") # Normalize output title = ai_resp.get("title") or req.title out_sections = [] for s in ai_resp.get("sections", []) or []: out_sections.append( MediumGeneratedSection( id=str(s.get("id")), heading=s.get("heading") or "", content=s.get("content") or "", wordCount=int(s.get("wordCount") or 0), sources=[ # map to ResearchSource shape if possible; keep minimal ResearchSource(title=src.get("title", ""), url=src.get("url", "")) for src in (s.get("sources") or []) ] or None, ) ) duration_ms = int((time.time() - start) * 1000) result = MediumBlogGenerateResult( success=True, title=title, sections=out_sections, model="gemini-2.5-flash", generation_time_ms=duration_ms, safety_flags=None, ) # Cache the result for future use try: self.cache.cache_content( keywords=req.researchKeywords or [], sections=sections_for_cache, global_target_words=req.globalTargetWords or 1000, persona_data=req.persona.dict() if req.persona else None, tone=req.tone or "professional", audience=req.audience or "general", result=result.dict() ) logger.info(f"Cached content result for keywords: {req.researchKeywords}") except Exception as cache_error: logger.warning(f"Failed to cache content result: {cache_error}") # Don't fail the entire operation if caching fails return result