Added blog writer implementation - WIP

2025-09-12 10:26:08 +05:30
parent 1b65a9487b
commit c0a366269d
38 changed files with 4948 additions and 98 deletions
--- a/backend/services/blog_writer/blog_service.py
+++ b/backend/services/blog_writer/blog_service.py
@@ -0,0 +1,649 @@
+from typing import Any, Dict, List
+from loguru import logger
+from services.llm_providers.gemini_provider import gemini_structured_json_response
+
+from models.blog_models import (
+    BlogResearchRequest,
+    BlogResearchResponse,
+    BlogOutlineRequest,
+    BlogOutlineResponse,
+    BlogOutlineRefineRequest,
+    BlogSectionRequest,
+    BlogSectionResponse,
+    BlogOptimizeRequest,
+    BlogOptimizeResponse,
+    BlogSEOAnalyzeRequest,
+    BlogSEOAnalyzeResponse,
+    BlogSEOMetadataRequest,
+    BlogSEOMetadataResponse,
+    BlogPublishRequest,
+    BlogPublishResponse,
+    ResearchSource,
+    BlogOutlineSection,
+)
+
+
+class BlogWriterService:
+    """Service layer for AI Blog Writer (stub implementations for scaffolding)."""
+
+    async def research(self, request: BlogResearchRequest) -> BlogResearchResponse:
+        """
+        Stage 1: Research & Strategy (AI Orchestration)
+        Uses ONLY Gemini's native Google Search grounding - ONE API call for everything.
+        Follows LinkedIn service pattern for efficiency and cost optimization.
+        """
+        from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
+        
+        gemini = GeminiGroundedProvider()
+
+        topic = request.topic or ", ".join(request.keywords)
+        industry = request.industry or (request.persona.industry if request.persona and request.persona.industry else "General")
+        target_audience = getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'
+
+        # Single comprehensive research prompt - Gemini handles Google Search automatically
+        research_prompt = f"""
+        Research the topic "{topic}" in the {industry} industry for {target_audience} audience. Provide a comprehensive analysis including:
+
+        1. Current trends and insights (2024-2025)
+        2. Key statistics and data points with sources
+        3. Industry expert opinions and quotes
+        4. Recent developments and news
+        5. Market analysis and forecasts
+        6. Best practices and case studies
+        7. Keyword analysis: primary, secondary, and long-tail opportunities
+        8. Competitor analysis: top players and content gaps
+        9. Content angle suggestions: 5 compelling angles for blog posts
+
+        Focus on factual, up-to-date information from credible sources.
+        Include specific data points, percentages, and recent developments.
+        Structure your response with clear sections for each analysis area.
+        """
+        
+        # Single Gemini call with native Google Search grounding - no fallbacks
+        gemini_result = await gemini.generate_grounded_content(
+            prompt=research_prompt,
+            content_type="research",
+            max_tokens=2000
+        )
+        
+        # Extract sources from grounding metadata
+        sources = self._extract_sources_from_grounding(gemini_result)
+        
+        # Extract search widget and queries for UI display
+        search_widget = gemini_result.get("search_widget", "") or ""
+        search_queries = gemini_result.get("search_queries", []) or []
+        
+        # Parse the comprehensive response for different analysis components
+        content = gemini_result.get("content", "")
+        keyword_analysis = self._parse_keyword_analysis(content, request.keywords)
+        competitor_analysis = self._parse_competitor_analysis(content)
+        suggested_angles = self._parse_content_angles(content, topic, industry)
+        
+        logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
+
+        return BlogResearchResponse(
+            success=True,
+            sources=sources,
+            keyword_analysis=keyword_analysis,
+            competitor_analysis=competitor_analysis,
+            suggested_angles=suggested_angles,
+            # Add search widget and queries for UI display
+            search_widget=search_widget if 'search_widget' in locals() else "",
+            search_queries=search_queries if 'search_queries' in locals() else [],
+        )
+
+    def _extract_sources_from_grounding(self, gemini_result: Dict[str, Any]) -> List[ResearchSource]:
+        """Extract sources from Gemini grounding metadata."""
+        sources = []
+        
+        # The Gemini grounded provider already extracts sources and puts them in the 'sources' field
+        raw_sources = gemini_result.get("sources", [])
+        for src in raw_sources:
+            source = ResearchSource(
+                title=src.get("title", "Untitled"),
+                url=src.get("url", ""),
+                excerpt=src.get("content", "")[:500] if src.get("content") else f"Source from {src.get('title', 'web')}",
+                credibility_score=float(src.get("credibility_score", 0.8)),
+                published_at=str(src.get("publication_date", "2024-01-01"))
+            )
+            sources.append(source)
+        
+        return sources
+
+    def _parse_keyword_analysis(self, content: str, original_keywords: List[str]) -> Dict[str, Any]:
+        """Parse keyword analysis from the research content."""
+        # Extract keywords from content sections
+        lines = content.split('\n')
+        keyword_section = []
+        in_keyword_section = False
+        
+        for line in lines:
+            if 'keyword' in line.lower() and ('analysis' in line.lower() or 'primary' in line.lower()):
+                in_keyword_section = True
+                continue
+            if in_keyword_section and line.strip():
+                if line.startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.')):
+                    break
+                keyword_section.append(line.strip())
+        
+        return {
+            "primary": original_keywords[:1] if original_keywords else [],
+            "secondary": original_keywords[1:] if len(original_keywords) > 1 else [],
+            "long_tail": [f"{kw} guide" for kw in original_keywords[:2]] if original_keywords else [],
+            "search_intent": "informational",
+            "difficulty": 6,
+            "content_gaps": [f"{kw} best practices" for kw in original_keywords[:2]] if original_keywords else [],
+            "analysis_content": "\n".join(keyword_section) if keyword_section else content[:200]
+        }
+
+    def _parse_competitor_analysis(self, content: str) -> Dict[str, Any]:
+        """Parse competitor analysis from the research content."""
+        lines = content.split('\n')
+        competitor_section = []
+        in_competitor_section = False
+        
+        for line in lines:
+            if 'competitor' in line.lower() and ('analysis' in line.lower() or 'top' in line.lower()):
+                in_competitor_section = True
+                continue
+            if in_competitor_section and line.strip():
+                if line.startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.')):
+                    break
+                competitor_section.append(line.strip())
+        
+        return {
+            "top_competitors": [],
+            "content_gaps": [],
+            "opportunities": [],
+            "analysis_notes": "\n".join(competitor_section) if competitor_section else "Competitor analysis from research"
+        }
+
+    def _parse_content_angles(self, content: str, topic: str, industry: str) -> List[str]:
+        """Parse content angles from the research content."""
+        lines = content.split('\n')
+        angles_section = []
+        in_angles_section = False
+        
+        for line in lines:
+            if 'angle' in line.lower() and ('suggest' in line.lower() or 'content' in line.lower()):
+                in_angles_section = True
+                continue
+            if in_angles_section and line.strip():
+                if line.startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.')):
+                    break
+                if line.strip() and not line.startswith(('•', '-', '*')):
+                    angles_section.append(line.strip())
+        
+        # If no angles found in content, use fallback
+        if not angles_section:
+            angles_section = [
+                f"How {topic} is Transforming {industry}",
+                f"Latest {topic} Trends: What You Need to Know",
+                f"{topic} Best Practices for {industry}",
+                f"Case Study: {topic} Success Stories",
+                f"The Future of {topic} in {industry}"
+            ]
+        
+        return angles_section[:5]  # Return top 5 angles
+
+
+    async def generate_outline(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
+        """
+        Stage 2: Content Planning with AI-generated outline using research results
+        Uses Gemini with research data to create comprehensive, SEO-optimized outline
+        """
+        # Extract research insights
+        research = request.research
+        primary_keywords = research.keyword_analysis.get('primary', [])
+        secondary_keywords = research.keyword_analysis.get('secondary', [])
+        content_angles = research.suggested_angles
+        sources = research.sources
+        search_intent = research.keyword_analysis.get('search_intent', 'informational')
+        
+        # Build sophisticated outline generation prompt with advanced content strategy
+        outline_prompt = f"""
+        You are a world-class content strategist and SEO expert with 15+ years of experience creating viral, high-converting blog content. Your outlines have generated millions of views and driven significant business results.
+
+        CONTENT STRATEGY BRIEF:
+        Topic: {', '.join(primary_keywords)}
+        Search Intent: {search_intent}
+        Target Word Count: {request.word_count or 1500} words
+        Industry Context: {getattr(request.persona, 'industry', 'General') if request.persona else 'General'}
+        Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}
+
+        RESEARCH INTELLIGENCE:
+        Primary Keywords: {', '.join(primary_keywords)}
+        Secondary Keywords: {', '.join(secondary_keywords)}
+        Long-tail Opportunities: {', '.join(research.keyword_analysis.get('long_tail', [])[:5])}
+        
+        Content Angles Discovered:
+        {chr(10).join([f"• {angle}" for angle in content_angles[:6]])}
+        
+        Research Sources Available: {len(sources)} authoritative sources with current data
+
+        STRATEGIC OUTLINE REQUIREMENTS:
+
+        1. CONTENT ARCHITECTURE:
+           - Create 5-7 sections that follow a logical progression
+           - Each section must have a clear purpose and value proposition
+           - Build a narrative arc that keeps readers engaged throughout
+           - Include strategic content gaps that competitors miss
+
+        2. SEO OPTIMIZATION:
+           - Naturally integrate primary keywords in H2 headings (not forced)
+           - Use secondary keywords in subheadings and key points
+           - Include long-tail keywords in natural language
+           - Optimize for featured snippets and voice search
+           - Create semantic keyword clusters
+
+        3. READER ENGAGEMENT:
+           - Start with a compelling hook that addresses pain points
+           - Use storytelling elements and real-world examples
+           - Include actionable insights readers can implement immediately
+           - Create sections that encourage social sharing
+           - End with a strong call-to-action
+
+        4. CONTENT DEPTH:
+           - Each section: 2-4 specific, actionable subheadings
+           - Each section: 4-6 key points with research-backed insights
+           - Include data points, statistics, and case studies where relevant
+           - Address common objections and questions
+           - Provide unique angles not covered by competitors
+
+        5. WORD COUNT DISTRIBUTION:
+           - Introduction: 10-15% of total words
+           - Main sections: 70-80% of total words (distributed strategically)
+           - Conclusion: 10-15% of total words
+           - Total target: {request.word_count or 1500} words
+
+        6. COMPETITIVE ADVANTAGE:
+           - Include fresh perspectives from recent research
+           - Address emerging trends and future implications
+           - Provide deeper insights than surface-level content
+           - Include practical tools, frameworks, or templates
+           - Reference authoritative sources and data
+
+        TITLE STRATEGY:
+        Create 3 distinct title options that:
+        - Include primary keywords naturally
+        - Promise clear value to readers
+        - Create curiosity and urgency
+        - Are optimized for click-through rates
+        - Work well for social media sharing
+
+        CRITICAL: Respond ONLY with valid JSON. No additional text or explanations.
+
+        JSON FORMAT:
+        {{
+            "title_options": [
+                "Compelling title with primary keyword and benefit",
+                "Question-based title that creates curiosity",
+                "How-to title with specific outcome promise"
+            ],
+            "outline": [
+                {{
+                    "heading": "Strategic section title with primary keyword",
+                    "subheadings": [
+                        "Specific, actionable subheading 1",
+                        "Data-driven subheading 2", 
+                        "Case study or example subheading 3"
+                    ],
+                    "key_points": [
+                        "Research-backed insight with specific data",
+                        "Actionable step readers can take immediately",
+                        "Common mistake to avoid with explanation",
+                        "Advanced tip that provides competitive advantage",
+                        "Real-world example or case study"
+                    ],
+                    "target_words": 300,
+                    "keywords": ["primary keyword", "secondary keyword", "long-tail phrase"]
+                }}
+            ]
+        }}
+        """
+        
+        logger.info("Generating AI-powered outline using research results")
+        
+        # Define the schema for structured JSON response
+        outline_schema = {
+            "type": "object",
+            "properties": {
+                "title_options": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "3 SEO-optimized title options"
+                },
+                "outline": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "id": {"type": "string"},
+                            "heading": {"type": "string"},
+                            "subheadings": {
+                                "type": "array",
+                                "items": {"type": "string"}
+                            },
+                            "key_points": {
+                                "type": "array",
+                                "items": {"type": "string"}
+                            },
+                            "word_count": {"type": "integer"},
+                            "keywords": {
+                                "type": "array",
+                                "items": {"type": "string"},
+                                "description": "Keywords to focus on in this section"
+                            }
+                        },
+                        "required": ["id", "heading", "subheadings", "key_points", "word_count", "keywords"]
+                    }
+                }
+            },
+            "required": ["title_options", "outline"]
+        }
+        
+        # Generate outline using structured JSON response (no grounding needed)
+        outline_data = gemini_structured_json_response(
+            prompt=outline_prompt,
+            schema=outline_schema,
+            temperature=0.3,
+            max_tokens=3000
+        )
+        
+        # Check for errors in the response
+        if isinstance(outline_data, dict) and 'error' in outline_data:
+            logger.error(f"Gemini structured response error: {outline_data['error']}")
+            raise ValueError(f"AI outline generation failed: {outline_data['error']}")
+        
+        # Validate required fields
+        if not isinstance(outline_data, dict) or 'outline' not in outline_data or not isinstance(outline_data['outline'], list):
+            logger.error(f"Invalid outline structure: {outline_data}")
+            raise ValueError("Invalid outline structure in Gemini response")
+        
+        # Convert to BlogOutlineSection objects
+        outline_sections = []
+        for i, section_data in enumerate(outline_data.get('outline', [])):
+            if not isinstance(section_data, dict) or 'heading' not in section_data:
+                logger.warning(f"Skipping invalid section data at index {i}")
+                continue
+                
+            section = BlogOutlineSection(
+                id=f"s{i+1}",
+                heading=section_data.get('heading', f'Section {i+1}'),
+                subheadings=section_data.get('subheadings', []),
+                key_points=section_data.get('key_points', []),
+                references=sources[:2] if i < 2 else [],  # Assign sources to first 2 sections
+                target_words=section_data.get('target_words', 300),
+                keywords=section_data.get('keywords', [])
+            )
+            outline_sections.append(section)
+        
+        title_options = outline_data.get('title_options', [])
+        if not title_options:
+            raise ValueError("No title options provided in Gemini response")
+        
+        logger.info(f"Generated outline with {len(outline_sections)} sections and {len(title_options)} title options")
+        
+        return BlogOutlineResponse(
+            success=True, 
+            title_options=title_options, 
+            outline=outline_sections
+        )
+    
+
+    async def refine_outline(self, request: BlogOutlineRefineRequest) -> BlogOutlineResponse:
+        """
+        Refine outline with HITL (Human-in-the-Loop) operations
+        Supports add, remove, move, merge, rename operations
+        """
+        outline = request.outline.copy()
+        operation = request.operation.lower()
+        section_id = request.section_id
+        payload = request.payload or {}
+        
+        try:
+            if operation == 'add':
+                # Add new section
+                new_section = BlogOutlineSection(
+                    id=f"s{len(outline) + 1}",
+                    heading=payload.get('heading', 'New Section'),
+                    subheadings=payload.get('subheadings', []),
+                    key_points=payload.get('key_points', []),
+                    references=[],
+                    target_words=payload.get('target_words', 300)
+                )
+                outline.append(new_section)
+                logger.info(f"Added new section: {new_section.heading}")
+                
+            elif operation == 'remove' and section_id:
+                # Remove section
+                outline = [s for s in outline if s.id != section_id]
+                logger.info(f"Removed section: {section_id}")
+                
+            elif operation == 'rename' and section_id:
+                # Rename section
+                for section in outline:
+                    if section.id == section_id:
+                        section.heading = payload.get('heading', section.heading)
+                        break
+                logger.info(f"Renamed section {section_id} to: {payload.get('heading')}")
+                
+            elif operation == 'move' and section_id:
+                # Move section (reorder)
+                direction = payload.get('direction', 'down')  # 'up' or 'down'
+                current_index = next((i for i, s in enumerate(outline) if s.id == section_id), -1)
+                
+                if current_index != -1:
+                    if direction == 'up' and current_index > 0:
+                        outline[current_index], outline[current_index - 1] = outline[current_index - 1], outline[current_index]
+                    elif direction == 'down' and current_index < len(outline) - 1:
+                        outline[current_index], outline[current_index + 1] = outline[current_index + 1], outline[current_index]
+                logger.info(f"Moved section {section_id} {direction}")
+                
+            elif operation == 'merge' and section_id:
+                # Merge with next section
+                current_index = next((i for i, s in enumerate(outline) if s.id == section_id), -1)
+                if current_index != -1 and current_index < len(outline) - 1:
+                    current_section = outline[current_index]
+                    next_section = outline[current_index + 1]
+                    
+                    # Merge sections
+                    current_section.heading = f"{current_section.heading} & {next_section.heading}"
+                    current_section.subheadings.extend(next_section.subheadings)
+                    current_section.key_points.extend(next_section.key_points)
+                    current_section.references.extend(next_section.references)
+                    current_section.target_words = (current_section.target_words or 0) + (next_section.target_words or 0)
+                    
+                    # Remove the next section
+                    outline.pop(current_index + 1)
+                logger.info(f"Merged section {section_id} with next section")
+                
+            elif operation == 'update' and section_id:
+                # Update section details
+                for section in outline:
+                    if section.id == section_id:
+                        if 'heading' in payload:
+                            section.heading = payload['heading']
+                        if 'subheadings' in payload:
+                            section.subheadings = payload['subheadings']
+                        if 'key_points' in payload:
+                            section.key_points = payload['key_points']
+                        if 'target_words' in payload:
+                            section.target_words = payload['target_words']
+                        break
+                logger.info(f"Updated section {section_id}")
+            
+            # Reassign IDs to maintain order
+            for i, section in enumerate(outline):
+                section.id = f"s{i+1}"
+            
+            return BlogOutlineResponse(
+                success=True, 
+                title_options=["Refined Outline"], 
+                outline=outline
+            )
+            
+        except Exception as e:
+            logger.error(f"Outline refinement failed: {e}")
+            return BlogOutlineResponse(
+                success=False, 
+                title_options=["Error"], 
+                outline=request.outline
+            )
+
+    async def generate_section(self, request: BlogSectionRequest) -> BlogSectionResponse:
+        # TODO: Generate section markdown incorporating references and persona/tone
+        md = f"## {request.section.heading}\n\nThis section content will be generated here.\n"
+        return BlogSectionResponse(success=True, markdown=md, citations=request.section.references)
+
+    async def optimize_section(self, request: BlogOptimizeRequest) -> BlogOptimizeResponse:
+        # TODO: Run readability/EEAT optimization and return diff
+        return BlogOptimizeResponse(success=True, optimized=request.content, diff_preview=None)
+
+    async def hallucination_check(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        """Run hallucination detection on provided text using existing detector service."""
+        text = str(payload.get("text", "") or "").strip()
+        if not text:
+            return {"success": False, "error": "No text provided"}
+
+        # Prefer direct service use over HTTP proxy
+        try:
+            from services.hallucination_detector import HallucinationDetector
+            detector = HallucinationDetector()
+            result = await detector.detect_hallucinations(text)
+
+            # Serialize dataclass-like result to dict
+            claims = []
+            for c in result.claims:
+                claims.append({
+                    "text": c.text,
+                    "confidence": c.confidence,
+                    "assessment": c.assessment,
+                    "supporting_sources": c.supporting_sources,
+                    "refuting_sources": c.refuting_sources,
+                    "reasoning": c.reasoning,
+                })
+
+            return {
+                "success": True,
+                "overall_confidence": result.overall_confidence,
+                "total_claims": result.total_claims,
+                "supported_claims": result.supported_claims,
+                "refuted_claims": result.refuted_claims,
+                "insufficient_claims": result.insufficient_claims,
+                "timestamp": result.timestamp,
+                "claims": claims,
+            }
+        except Exception as e:
+            return {"success": False, "error": str(e)}
+
+    async def seo_analyze(self, request: BlogSEOAnalyzeRequest) -> BlogSEOAnalyzeResponse:
+        """Wrap existing SEO tools to produce unified analysis for blog content."""
+        from services.seo_tools.on_page_seo_service import OnPageSEOService
+        from services.seo_tools.image_alt_service import ImageAltService
+        from services.seo_tools.content_strategy_service import ContentStrategyService
+
+        content = request.content or ""
+        target_keywords = request.keywords or []
+
+        # On-page analysis (treat content as a virtual URL/document for now)
+        on_page = OnPageSEOService()
+        on_page_result = await on_page.analyze_on_page_seo(url="about:blank", target_keywords=target_keywords)
+
+        # Image alt coverage (placeholder: no images in raw content yet)
+        try:
+            image_alt_service = ImageAltService()
+            image_alt_status = {"total_images": 0, "missing_alt": 0}
+        except Exception:
+            image_alt_status = {"total_images": 0, "missing_alt": 0}
+
+        # Strategy hints (keywords/topics)
+        try:
+            strategy = ContentStrategyService()
+            strategy_hints = await strategy.analyze_content_topics(content=content)
+        except Exception:
+            strategy_hints = {"topics": [], "gaps": []}
+
+        # Lightweight markdown parsing for headings/links/keywords
+        import re
+        content_text = content or ""
+        words = re.findall(r"[A-Za-z0-9']+", content_text)
+        total_words = max(len(words), 1)
+        heading_lines = content_text.splitlines()
+        h1 = sum(1 for ln in heading_lines if ln.startswith('# '))
+        h2 = sum(1 for ln in heading_lines if ln.startswith('## '))
+        h3 = sum(1 for ln in heading_lines if ln.startswith('### '))
+        md_links = re.findall(r"\[([^\]]+)\]\(([^)]+)\)", content_text)
+        external_links = [u for (_t, u) in md_links if u.startswith('http')]
+
+        # Keyword density
+        density_map: Dict[str, Any] = {"target_keywords": target_keywords}
+        for kw in target_keywords:
+            try:
+                occurrences = len(re.findall(re.escape(kw), content_text, flags=re.IGNORECASE))
+            except re.error:
+                occurrences = 0
+            density_map[kw] = {
+                "occurrences": occurrences,
+                "density": round(occurrences / total_words, 4)
+            }
+
+        # Build unified response
+        recommendations: List[str] = []
+        if isinstance(on_page_result.get("recommendations"), list):
+            recommendations.extend(on_page_result["recommendations"]) 
+        if strategy_hints.get("gaps"):
+            recommendations.append("Cover missing topics: " + ", ".join(strategy_hints["gaps"]))
+        if not external_links:
+            recommendations.append("Add at least one credible external link to authoritative sources.")
+        if h2 < 2:
+            recommendations.append("Increase number of H2 sections for better structure.")
+
+        # Internal link suggestions: generate anchors for H2s and propose cross-links
+        def to_anchor(h: str) -> str:
+            import re
+            a = re.sub(r"[^a-z0-9\s-]", "", h.lower())
+            a = re.sub(r"\s+", "-", a).strip('-')
+            return a
+        h2_headings = [ln[3:].strip() for ln in heading_lines if ln.startswith('## ')]
+        anchors = [to_anchor(h) for h in h2_headings]
+        internal_link_suggestions = []
+        for i in range(len(anchors)-1):
+            internal_link_suggestions.append({
+                "from": h2_headings[i],
+                "to": h2_headings[i+1],
+                "anchor": f"#{anchors[i+1]}",
+                "suggestion": f"Add internal link from '{h2_headings[i]}' to '{h2_headings[i+1]}'"
+            })
+
+        return BlogSEOAnalyzeResponse(
+            success=True,
+            seo_score=float(on_page_result.get("overall_score", 75)),
+            density=density_map,
+            structure={
+                **on_page_result.get("heading_structure", {}),
+                "markdown_headings": {"h1": h1, "h2": h2, "h3": h3},
+                "links": {"total": len(md_links), "external": len(external_links)}
+            },
+            readability=on_page_result.get("content_analysis", {}),
+            link_suggestions=([{"suggestion": "Add external citation links for key claims."}] if not external_links else []) + internal_link_suggestions,
+            image_alt_status=image_alt_status,
+            recommendations=recommendations,
+        )
+
+    async def seo_metadata(self, request: BlogSEOMetadataRequest) -> BlogSEOMetadataResponse:
+        # TODO: Generate SEO metadata using existing services
+        return BlogSEOMetadataResponse(
+            success=True,
+            title_options=[request.title or "Generated SEO Title"],
+            meta_descriptions=["Compelling meta description..."],
+            open_graph={"title": request.title or "OG Title", "image": ""},
+            twitter_card={"card": "summary_large_image"},
+            schema={"@type": "Article"},
+        )
+
+    async def publish(self, request: BlogPublishRequest) -> BlogPublishResponse:
+        # TODO: Call Wix/WordPress adapters to publish
+        return BlogPublishResponse(success=True, platform=request.platform, url="https://example.com/post")
+
+
--- a/backend/services/llm_providers/pycache/gemini_provider.cpython-313.pyc
+++ b/backend/services/llm_providers/pycache/gemini_provider.cpython-313.pyc
--- a/backend/services/llm_providers/gemini_grounded_provider.py
+++ b/backend/services/llm_providers/gemini_grounded_provider.py
@@ -43,7 +43,7 @@ class GeminiGroundedProvider:
        
        # Initialize the Gemini client with timeout configuration
        self.client = genai.Client(api_key=self.api_key)
-        self.timeout = 30  # 30 second timeout for API calls
+        self.timeout = 60  # 60 second timeout for API calls (increased for research)
        logger.info("✅ Gemini Grounded Provider initialized with native Google Search grounding")
    
    async def generate_grounded_content(
@@ -239,8 +239,8 @@ class GeminiGroundedProvider:
                        logger.info(f"Search queries: {grounding_metadata.web_search_queries}")
                    
                    # Extract sources from grounding chunks
+                    sources = []  # Initialize sources list
                    if hasattr(grounding_metadata, 'grounding_chunks') and grounding_metadata.grounding_chunks:
-                        sources = []
                        for i, chunk in enumerate(grounding_metadata.grounding_chunks):
                            logger.info(f"Chunk {i} attributes: {dir(chunk)}")
                            if hasattr(chunk, 'web'):
@@ -251,15 +251,29 @@ class GeminiGroundedProvider:
                                    'type': 'web'
                                }
                                sources.append(source)
-                        result['sources'] = sources
-                        logger.info(f"Extracted {len(sources)} sources")
+                        logger.info(f"Extracted {len(sources)} sources from grounding chunks")
                    else:
-                        logger.error("❌ CRITICAL: No grounding chunks found in response")
-                        logger.error(f"Grounding metadata structure: {dir(grounding_metadata)}")
-                        if hasattr(grounding_metadata, 'grounding_chunks'):
-                            logger.error(f"Grounding chunks type: {type(grounding_metadata.grounding_chunks)}")
-                            logger.error(f"Grounding chunks value: {grounding_metadata.grounding_chunks}")
-                        raise ValueError("No grounding chunks found - grounding is not working properly")
+                        logger.warning("⚠️ No grounding chunks found - this is normal for some queries")
+                        logger.info(f"Grounding metadata available fields: {[attr for attr in dir(grounding_metadata) if not attr.startswith('_')]}")
+                        
+                        # Check if we have search queries - this means Google Search was triggered
+                        if hasattr(grounding_metadata, 'web_search_queries') and grounding_metadata.web_search_queries:
+                            logger.info(f"✅ Google Search was triggered with {len(grounding_metadata.web_search_queries)} queries")
+                            # Create sources based on search queries
+                            for i, query in enumerate(grounding_metadata.web_search_queries[:5]):  # Limit to 5 sources
+                                source = {
+                                    'index': i,
+                                    'title': f"Search: {query}",
+                                    'url': f"https://www.google.com/search?q={query.replace(' ', '+')}",
+                                    'type': 'search_query',
+                                    'query': query
+                                }
+                                sources.append(source)
+                            logger.info(f"Created {len(sources)} sources from search queries")
+                        else:
+                            logger.warning("⚠️ No search queries found either - grounding may not have been triggered")
+                    
+                    result['sources'] = sources
                    
                    # Extract citations from grounding supports
                    if hasattr(grounding_metadata, 'grounding_supports') and grounding_metadata.grounding_supports:
@@ -278,12 +292,37 @@ class GeminiGroundedProvider:
                        result['citations'] = citations
                        logger.info(f"Extracted {len(citations)} citations")
                    else:
-                        logger.error("❌ CRITICAL: No grounding supports found in response")
-                        logger.error(f"Grounding metadata structure: {dir(grounding_metadata)}")
-                        if hasattr(grounding_metadata, 'grounding_supports'):
-                            logger.error(f"Grounding supports type: {type(grounding_metadata.grounding_supports)}")
-                            logger.error(f"Grounding supports value: {grounding_metadata.grounding_supports}")
-                        raise ValueError("No grounding supports found - grounding is not working properly")
+                        logger.warning("⚠️ No grounding supports found - this is normal when no web sources are retrieved")
+                        # Create basic citations from the content if we have sources
+                        if sources:
+                            citations = []
+                            for i, source in enumerate(sources[:3]):  # Limit to 3 citations
+                                citation = {
+                                    'type': 'reference',
+                                    'start_index': 0,
+                                    'end_index': 0,
+                                    'text': f"Source {i+1}",
+                                    'source_indices': [i],
+                                    'reference': f"Source {i+1}",
+                                    'source': source
+                                }
+                                citations.append(citation)
+                            result['citations'] = citations
+                            logger.info(f"Created {len(citations)} basic citations from sources")
+                        else:
+                            result['citations'] = []
+                            logger.info("No citations created - no sources available")
+                    
+                    # Extract search entry point for UI display
+                    if hasattr(grounding_metadata, 'search_entry_point') and grounding_metadata.search_entry_point:
+                        if hasattr(grounding_metadata.search_entry_point, 'rendered_content'):
+                            result['search_widget'] = grounding_metadata.search_entry_point.rendered_content
+                            logger.info("✅ Extracted search widget HTML for UI display")
+                    
+                    # Extract search queries for reference
+                    if hasattr(grounding_metadata, 'web_search_queries') and grounding_metadata.web_search_queries:
+                        result['search_queries'] = grounding_metadata.web_search_queries
+                        logger.info(f"✅ Extracted {len(grounding_metadata.web_search_queries)} search queries")
                    
                    logger.info(f"✅ Successfully extracted {len(result['sources'])} sources and {len(result['citations'])} citations from grounding metadata")
                    logger.info(f"Sources: {result['sources']}")
--- a/backend/services/llm_providers/gemini_provider.py
+++ b/backend/services/llm_providers/gemini_provider.py
@@ -389,43 +389,13 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
            config=generation_config,
        )

-        # Add debugging for response
-        logger.info("Gemini response | type=%s | has_text=%s | has_parsed=%s",
-                     type(response), hasattr(response, 'text'), hasattr(response, 'parsed'))
-        
-        if hasattr(response, 'text'):
-            logger.info(f"Gemini response.text: {repr(response.text)}")
-        if hasattr(response, 'parsed'):
-            logger.info(f"Gemini response.parsed: {repr(response.parsed)}")
-
        # According to the documentation, we should use response.parsed for structured output
        if hasattr(response, 'parsed') and response.parsed is not None:
            logger.info("Using response.parsed for structured output")
            return response.parsed
        
-        # Fallback to text if parsed is not available
-        if hasattr(response, 'text') and response.text:
-            logger.info("Falling back to response.text parsing")
-            text = response.text.strip()
-            
-            # Strip markdown code fences if present
-            if text.startswith('```'):
-                if text.lower().startswith('```json'):
-                    text = text[7:]
-                else:
-                    text = text[3:]
-                if text.endswith('```'):
-                    text = text[:-3]
-                text = text.strip()
-            
-            try:
-                return json.loads(text)
-            except json.JSONDecodeError as e:
-                logger.error(f"Failed to parse response.text as JSON: {e}")
-                return {"error": f"Failed to parse JSON response: {e}", "raw_response": text[:500]}
-        
-        logger.error("No valid response content found")
-        return {"error": "No valid response content found", "raw_response": ""}
+        logger.error("No valid structured response content found")
+        return {"error": "No valid structured response content found"}

    except ValueError as e:
        # API key related errors
--- a/backend/services/research/google_search_service.py
+++ b/backend/services/research/google_search_service.py
@@ -45,8 +45,7 @@ class GoogleSearchService:
        self.base_url = "https://www.googleapis.com/customsearch/v1"
        
        if not self.api_key or not self.search_engine_id:
-            logger.warning("Google Search API credentials not configured. Service will use fallback methods.")
-            self.enabled = False
+            raise ValueError("Google Search API credentials not configured. Please set GOOGLE_SEARCH_API_KEY and GOOGLE_SEARCH_ENGINE_ID environment variables.")
        else:
            self.enabled = True
            logger.info("Google Search Service initialized successfully")
@@ -69,8 +68,7 @@ class GoogleSearchService:
            List of search results with credibility scoring
        """
        if not self.enabled:
-            logger.warning("Google Search Service not enabled, using fallback research")
-            return await self._fallback_research(topic, industry)
+            raise RuntimeError("Google Search Service is not enabled. Please configure API credentials.")
        
        try:
            # Construct industry-specific search query
@@ -99,7 +97,7 @@ class GoogleSearchService:
            
        except Exception as e:
            logger.error(f"Google search failed: {str(e)}")
-            return await self._fallback_research(topic, industry)
+            raise RuntimeError(f"Google search failed: {str(e)}")
    
    def _build_search_query(self, topic: str, industry: str) -> str:
        """
@@ -465,45 +463,6 @@ class GoogleSearchService:
            "statistics": statistics
        }
    
-    async def _fallback_research(self, topic: str, industry: str) -> Dict[str, Any]:
-        """
-        Fallback research method when Google Search is not available.
-        
-        Args:
-            topic: The research topic
-            industry: The industry context
-            
-        Returns:
-            Fallback research data
-        """
-        logger.info(f"Using fallback research for {topic} in {industry}")
-        
-        return {
-            "sources": [
-                {
-                    "title": f"Industry insights on {topic} in {industry}",
-                    "url": f"https://example.com/{topic.lower().replace(' ', '-')}",
-                    "content": f"Professional insights and trends related to {topic} in the {industry} sector...",
-                    "relevance_score": 0.8,
-                    "credibility_score": 0.6,
-                    "domain_authority": 0.5,
-                    "source_type": "general",
-                    "grounding_enabled": False
-                }
-            ],
-            "key_insights": [
-                f"{topic} is transforming {industry} operations",
-                f"Industry leaders are investing in {topic}",
-                f"Expected growth in {topic} adoption within {industry}"
-            ],
-            "statistics": [
-                f"85% of {industry} companies are exploring {topic}",
-                f"Investment in {topic} increased by 40% this year"
-            ],
-            "grounding_enabled": False,
-            "search_query": f"{topic} {industry} trends",
-            "timestamp": datetime.utcnow().isoformat()
-        }
    
    async def test_api_connection(self) -> Dict[str, Any]:
        """
@@ -513,11 +472,7 @@ class GoogleSearchService:
            Test results and status information
        """
        if not self.enabled:
-            return {
-                "status": "disabled",
-                "message": "Google Search API credentials not configured",
-                "enabled": False
-            }
+            raise RuntimeError("Google Search Service is not enabled. Please configure API credentials.")
        
        try:
            # Perform a simple test search