Updated SEO Analysis Modal

2025-09-22 21:02:32 +05:30
parent f98d49cea7
commit 12119d418b
38 changed files with 5742 additions and 2337 deletions
--- a/backend/services/blog_writer/content/blog_rewriter.py
+++ b/backend/services/blog_writer/content/blog_rewriter.py
@@ -0,0 +1,209 @@
+"""
+Blog Rewriter Service
+
+Handles blog rewriting based on user feedback using structured AI calls.
+"""
+
+import time
+import uuid
+from typing import Dict, Any
+from loguru import logger
+
+from services.llm_providers.gemini_provider import gemini_structured_json_response
+
+
+class BlogRewriter:
+    """Service for rewriting blog content based on user feedback."""
+    
+    def __init__(self, task_manager):
+        self.task_manager = task_manager
+    
+    def start_blog_rewrite(self, request: Dict[str, Any]) -> str:
+        """Start blog rewrite task with user feedback."""
+        try:
+            # Extract request data
+            title = request.get("title", "Untitled Blog")
+            sections = request.get("sections", [])
+            research = request.get("research", {})
+            outline = request.get("outline", [])
+            feedback = request.get("feedback", "")
+            tone = request.get("tone")
+            audience = request.get("audience")
+            focus = request.get("focus")
+            
+            if not sections:
+                raise ValueError("No sections provided for rewrite")
+            
+            if not feedback or len(feedback.strip()) < 10:
+                raise ValueError("Feedback is required and must be at least 10 characters")
+            
+            # Create task for rewrite
+            task_id = f"rewrite_{int(time.time())}_{uuid.uuid4().hex[:8]}"
+            
+            # Start the rewrite task
+            self.task_manager.start_task(
+                task_id,
+                self._execute_blog_rewrite,
+                title=title,
+                sections=sections,
+                research=research,
+                outline=outline,
+                feedback=feedback,
+                tone=tone,
+                audience=audience,
+                focus=focus
+            )
+            
+            logger.info(f"Blog rewrite task started: {task_id}")
+            return task_id
+            
+        except Exception as e:
+            logger.error(f"Failed to start blog rewrite: {e}")
+            raise
+
+    async def _execute_blog_rewrite(self, task_id: str, **kwargs):
+        """Execute the blog rewrite task."""
+        try:
+            title = kwargs.get("title", "Untitled Blog")
+            sections = kwargs.get("sections", [])
+            research = kwargs.get("research", {})
+            outline = kwargs.get("outline", [])
+            feedback = kwargs.get("feedback", "")
+            tone = kwargs.get("tone")
+            audience = kwargs.get("audience")
+            focus = kwargs.get("focus")
+            
+            # Update task status
+            self.task_manager.update_task_status(task_id, "processing", "Analyzing current content and feedback...")
+            
+            # Build rewrite prompt with user feedback
+            system_prompt = f"""You are an expert blog writer tasked with rewriting content based on user feedback. 
+            
+            Current Blog Title: {title}
+            User Feedback: {feedback}
+            {f"Desired Tone: {tone}" if tone else ""}
+            {f"Target Audience: {audience}" if audience else ""}
+            {f"Focus Area: {focus}" if focus else ""}
+            
+            Your task is to rewrite the blog content to address the user's feedback while maintaining the core structure and research insights."""
+            
+            # Prepare content for rewrite
+            full_content = f"Title: {title}\n\n"
+            for section in sections:
+                full_content += f"Section: {section.get('heading', 'Untitled')}\n"
+                full_content += f"Content: {section.get('content', '')}\n\n"
+            
+            # Create rewrite prompt
+            rewrite_prompt = f"""
+            Based on the user feedback and current blog content, rewrite the blog to address their concerns and preferences.
+            
+            Current Content:
+            {full_content}
+            
+            User Feedback: {feedback}
+            {f"Desired Tone: {tone}" if tone else ""}
+            {f"Target Audience: {audience}" if audience else ""}
+            {f"Focus Area: {focus}" if focus else ""}
+            
+            Please rewrite the blog content in the following JSON format:
+            {{
+                "title": "New or improved blog title",
+                "sections": [
+                    {{
+                        "id": "section_id",
+                        "heading": "Section heading",
+                        "content": "Rewritten section content"
+                    }}
+                ]
+            }}
+            
+            Guidelines:
+            1. Address the user's feedback directly
+            2. Maintain the research insights and factual accuracy
+            3. Improve flow, clarity, and engagement
+            4. Keep the same section structure unless feedback suggests otherwise
+            5. Ensure content is well-formatted with proper paragraphs
+            """
+            
+            # Update task status
+            self.task_manager.update_task_status(task_id, "processing", "Generating rewritten content...")
+            
+            # Use structured JSON generation
+            schema = {
+                "type": "object",
+                "properties": {
+                    "title": {"type": "string"},
+                    "sections": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "id": {"type": "string"},
+                                "heading": {"type": "string"},
+                                "content": {"type": "string"}
+                            }
+                        }
+                    }
+                }
+            }
+            
+            result = gemini_structured_json_response(
+                prompt=rewrite_prompt,
+                schema=schema,
+                temperature=0.7,
+                max_tokens=4096,
+                system_prompt=system_prompt
+            )
+            
+            logger.info(f"Gemini response for rewrite task {task_id}: {result}")
+            
+            # Check if we have a valid result - handle both multi-section and single-section formats
+            is_valid_multi_section = result and not result.get("error") and result.get("title") and result.get("sections")
+            is_valid_single_section = result and not result.get("error") and (result.get("heading") or result.get("title")) and result.get("content")
+            
+            if is_valid_multi_section or is_valid_single_section:
+                # If single section format, convert to multi-section format for consistency
+                if is_valid_single_section and not is_valid_multi_section:
+                    # Convert single section to multi-section format
+                    converted_result = {
+                        "title": result.get("heading") or result.get("title") or "Rewritten Blog",
+                        "sections": [
+                            {
+                                "id": result.get("id") or "section_1",
+                                "heading": result.get("heading") or "Main Content",
+                                "content": result.get("content", "")
+                            }
+                        ]
+                    }
+                    result = converted_result
+                    logger.info(f"Converted single section response to multi-section format for task {task_id}")
+                
+                # Update task status with success
+                self.task_manager.update_task_status(
+                    task_id, 
+                    "completed", 
+                    "Blog rewrite completed successfully!",
+                    result=result
+                )
+                logger.info(f"Blog rewrite completed successfully: {task_id}")
+            else:
+                # More detailed error handling
+                if not result:
+                    error_msg = "No response from AI"
+                elif result.get("error"):
+                    error_msg = f"AI error: {result.get('error')}"
+                elif not (result.get("title") or result.get("heading")):
+                    error_msg = "AI response missing title/heading"
+                elif not (result.get("sections") or result.get("content")):
+                    error_msg = "AI response missing sections/content"
+                else:
+                    error_msg = "AI response has invalid structure"
+                
+                self.task_manager.update_task_status(task_id, "failed", f"Rewrite failed: {error_msg}")
+                logger.error(f"Blog rewrite failed: {error_msg}")
+                
+        except Exception as e:
+            error_msg = f"Blog rewrite error: {str(e)}"
+            self.task_manager.update_task_status(task_id, "failed", error_msg)
+            logger.error(f"Blog rewrite task failed: {e}")
+            raise
--- a/backend/services/blog_writer/content/medium_blog_generator.py
+++ b/backend/services/blog_writer/content/medium_blog_generator.py
@@ -0,0 +1,237 @@
+"""
+Medium Blog Generator Service
+
+Handles generation of medium-length blogs (≤1000 words) using structured AI calls.
+"""
+
+import time
+import json
+from typing import Dict, Any, List
+from loguru import logger
+
+from models.blog_models import (
+    MediumBlogGenerateRequest,
+    MediumBlogGenerateResult,
+    MediumGeneratedSection,
+    ResearchSource,
+)
+from services.llm_providers.gemini_provider import gemini_structured_json_response
+from services.cache.persistent_content_cache import persistent_content_cache
+
+
+class MediumBlogGenerator:
+    """Service for generating medium-length blog content using structured AI calls."""
+    
+    def __init__(self):
+        self.cache = persistent_content_cache
+    
+    async def generate_medium_blog_with_progress(self, req: MediumBlogGenerateRequest, task_id: str) -> MediumBlogGenerateResult:
+        """Use Gemini structured JSON to generate a medium-length blog in one call."""
+        import time
+        start = time.time()
+
+        # Prepare sections data for cache key generation
+        sections_for_cache = []
+        for s in req.sections:
+            sections_for_cache.append({
+                "id": s.id,
+                "heading": s.heading,
+                "keyPoints": getattr(s, "key_points", []) or getattr(s, "keyPoints", []),
+                "subheadings": getattr(s, "subheadings", []),
+                "keywords": getattr(s, "keywords", []),
+                "targetWords": getattr(s, "target_words", None) or getattr(s, "targetWords", None),
+            })
+
+        # Check cache first
+        cached_result = self.cache.get_cached_content(
+            keywords=req.researchKeywords or [],
+            sections=sections_for_cache,
+            global_target_words=req.globalTargetWords or 1000,
+            persona_data=req.persona.dict() if req.persona else None,
+            tone=req.tone,
+            audience=req.audience
+        )
+        
+        if cached_result:
+            logger.info(f"Using cached content for keywords: {req.researchKeywords} (saved expensive generation)")
+            # Add cache hit marker to distinguish from fresh generation
+            cached_result['generation_time_ms'] = 0  # Mark as cache hit
+            cached_result['cache_hit'] = True
+            return MediumBlogGenerateResult(**cached_result)
+
+        # Cache miss - proceed with AI generation
+        logger.info(f"Cache miss - generating new content for keywords: {req.researchKeywords}")
+
+        # Build schema expected from the model
+        schema = {
+            "type": "object",
+            "properties": {
+                "title": {"type": "string"},
+                "sections": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "id": {"type": "string"},
+                            "heading": {"type": "string"},
+                            "content": {"type": "string"},
+                            "wordCount": {"type": "number"},
+                            "sources": {
+                                "type": "array",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {"title": {"type": "string"}, "url": {"type": "string"}},
+                                },
+                            },
+                        },
+                    },
+                },
+            },
+        }
+
+        # Compose prompt
+        def section_block(s):
+            return {
+                "id": s.id,
+                "heading": s.heading,
+                "outline": {
+                    "keyPoints": getattr(s, "key_points", []) or getattr(s, "keyPoints", []),
+                    "subheadings": getattr(s, "subheadings", []),
+                    "keywords": getattr(s, "keywords", []),
+                    "targetWords": getattr(s, "target_words", None) or getattr(s, "targetWords", None),
+                    "references": [
+                        {"title": r.title, "url": r.url} for r in getattr(s, "references", [])
+                    ],
+                },
+            }
+
+        payload = {
+            "title": req.title,
+            "globalTargetWords": req.globalTargetWords or 1000,
+            "persona": req.persona.dict() if req.persona else None,
+            "tone": req.tone,
+            "audience": req.audience,
+            "sections": [section_block(s) for s in req.sections],
+        }
+
+        # Build persona-aware system prompt
+        persona_context = ""
+        if req.persona:
+            persona_context = f"""
+            PERSONA GUIDELINES:
+            - Industry: {req.persona.industry or 'General'}
+            - Tone: {req.persona.tone or 'Professional'}
+            - Audience: {req.persona.audience or 'General readers'}
+            - Persona ID: {req.persona.persona_id or 'Default'}
+            
+            Write content that reflects this persona's expertise and communication style.
+            Use industry-specific terminology and examples where appropriate.
+            Maintain consistent voice and authority throughout all sections.
+            """
+        
+        system = (
+            "You are a professional blog writer with deep expertise in your field. "
+            "Generate high-quality, persona-driven content for each section based on the provided outline. "
+            "Write engaging, informative content that follows the section's key points and target word count. "
+            "Ensure the content flows naturally and maintains consistent voice and authority. "
+            "Format content with proper paragraph breaks using double line breaks (\\n\\n) between paragraphs. "
+            "Structure content with clear paragraphs - aim for 2-4 sentences per paragraph. "
+            f"{persona_context}"
+            "Return ONLY valid JSON with no markdown formatting or explanations."
+        )
+
+        # Build persona-specific content instructions
+        persona_instructions = ""
+        if req.persona:
+            industry = req.persona.industry or 'General'
+            tone = req.persona.tone or 'Professional'
+            audience = req.persona.audience or 'General readers'
+            
+            persona_instructions = f"""
+            PERSONA-DRIVEN CONTENT REQUIREMENTS:
+            - Write as an expert in {industry} industry
+            - Use {tone} tone appropriate for {audience}
+            - Include industry-specific examples and terminology
+            - Demonstrate authority and expertise in the field
+            - Use language that resonates with {audience}
+            - Maintain consistent voice that reflects this persona's expertise
+            """
+        
+        prompt = (
+            f"Write blog content for the following sections. Each section should be {req.globalTargetWords or 1000} words total, distributed across all sections.\n\n"
+            f"Blog Title: {req.title}\n\n"
+            "For each section, write engaging content that:\n"
+            "- Follows the key points provided\n"
+            "- Uses the suggested keywords naturally\n"
+            "- Meets the target word count\n"
+            "- Maintains professional tone\n"
+            "- References the provided sources when relevant\n"
+            "- Breaks content into clear paragraphs (2-4 sentences each)\n"
+            "- Uses double line breaks (\\n\\n) between paragraphs for proper formatting\n"
+            "- Starts with an engaging opening paragraph\n"
+            "- Ends with a strong concluding paragraph\n"
+            f"{persona_instructions}\n"
+            "IMPORTANT: Format the 'content' field with proper paragraph breaks using \\n\\n between paragraphs.\n\n"
+            "Return a JSON object with 'title' and 'sections' array. Each section should have 'id', 'heading', 'content', and 'wordCount'.\n\n"
+            f"Sections to write:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
+        )
+
+        ai_resp = gemini_structured_json_response(
+            prompt=prompt,
+            schema=schema,
+            temperature=0.2,
+            max_tokens=8192,
+            system_prompt=system,
+        )
+
+        # Check for errors in AI response
+        if not ai_resp or ai_resp.get("error"):
+            error_msg = ai_resp.get("error", "Empty generation result from model") if ai_resp else "No response from model"
+            logger.error(f"AI generation failed: {error_msg}")
+            raise Exception(f"AI generation failed: {error_msg}")
+
+        # Normalize output
+        title = ai_resp.get("title") or req.title
+        out_sections = []
+        for s in ai_resp.get("sections", []) or []:
+            out_sections.append(
+                MediumGeneratedSection(
+                    id=str(s.get("id")),
+                    heading=s.get("heading") or "",
+                    content=s.get("content") or "",
+                    wordCount=int(s.get("wordCount") or 0),
+                    sources=[
+                        # map to ResearchSource shape if possible; keep minimal
+                        ResearchSource(title=src.get("title", ""), url=src.get("url", ""))
+                        for src in (s.get("sources") or [])
+                    ] or None,
+                )
+            )
+
+        duration_ms = int((time.time() - start) * 1000)
+        result = MediumBlogGenerateResult(
+            success=True,
+            title=title,
+            sections=out_sections,
+            model="gemini-2.5-flash",
+            generation_time_ms=duration_ms,
+            safety_flags=None,
+        )
+        
+        # Cache the result for future use
+        try:
+            self.cache.cache_content(
+                keywords=req.researchKeywords or [],
+                sections=sections_for_cache,
+                global_target_words=req.globalTargetWords or 1000,
+                persona_data=req.persona.dict() if req.persona else None,
+                tone=req.tone or "professional",
+                audience=req.audience or "general",
+                result=result.dict()
+            )
+            logger.info(f"Cached content result for keywords: {req.researchKeywords}")
+        except Exception as cache_error:
+            logger.warning(f"Failed to cache content result: {cache_error}")
+            # Don't fail the entire operation if caching fails
+        
+        return result