From f98d49cea7859a8ae2b48f5e5c79dc536165cbcb Mon Sep 17 00:00:00 2001 From: ajaysi Date: Sat, 20 Sep 2025 22:15:17 +0530 Subject: [PATCH] Allowing AI to generate suggestions for the blog writer --- backend/api/blog_writer/router.py | 37 + backend/api/blog_writer/task_manager.py | 52 +- backend/models/blog_models.py | 42 + .../blog_writer/core/blog_writer_service.py | 185 ++++ .../cache/persistent_content_cache.py | 363 ++++++++ .../services/llm_providers/gemini_provider.py | 41 +- .../src/components/BlogWriter/BlogWriter.tsx | 289 +++++-- .../BlogWriter/KeywordInputForm.tsx | 7 +- .../BlogWriter/OutlineFeedbackForm.tsx | 498 +++++++++++ .../BlogWriter/OutlineProgressModal.tsx | 290 +++++++ .../BlogWriter/StyledSuggestions.tsx | 108 +++ .../BlogWriter/SuggestionsGenerator.tsx | 94 ++- .../BlogWriter/WYSIWYG/BlogEditor.tsx | 293 +++++++ .../BlogWriter/WYSIWYG/BlogSection.tsx | 373 ++++++++ .../WYSIWYG/BlogTextSelectionHandler.tsx | 793 ++++++++++++++++++ .../BlogWriter/WYSIWYG/EditorSidebar.tsx | 89 ++ .../BlogWriter/WYSIWYG/HoverMenu.tsx | 318 +++++++ .../WYSIWYG/ResearchIntegration.tsx | 361 ++++++++ .../components/BlogWriter/WYSIWYG/index.ts | 3 + frontend/src/hooks/useBlogWriterState.ts | 43 +- frontend/src/hooks/usePolling.ts | 22 +- frontend/src/services/blogWriterApi.ts | 43 +- 22 files changed, 4248 insertions(+), 96 deletions(-) create mode 100644 backend/services/cache/persistent_content_cache.py create mode 100644 frontend/src/components/BlogWriter/OutlineFeedbackForm.tsx create mode 100644 frontend/src/components/BlogWriter/OutlineProgressModal.tsx create mode 100644 frontend/src/components/BlogWriter/StyledSuggestions.tsx create mode 100644 frontend/src/components/BlogWriter/WYSIWYG/BlogEditor.tsx create mode 100644 frontend/src/components/BlogWriter/WYSIWYG/BlogSection.tsx create mode 100644 frontend/src/components/BlogWriter/WYSIWYG/BlogTextSelectionHandler.tsx create mode 100644 frontend/src/components/BlogWriter/WYSIWYG/EditorSidebar.tsx create mode 100644 frontend/src/components/BlogWriter/WYSIWYG/HoverMenu.tsx create mode 100644 frontend/src/components/BlogWriter/WYSIWYG/ResearchIntegration.tsx create mode 100644 frontend/src/components/BlogWriter/WYSIWYG/index.ts diff --git a/backend/api/blog_writer/router.py b/backend/api/blog_writer/router.py index 12bd23ad..34af5326 100644 --- a/backend/api/blog_writer/router.py +++ b/backend/api/blog_writer/router.py @@ -31,6 +31,7 @@ from models.blog_models import ( from services.blog_writer.blog_service import BlogWriterService from .task_manager import task_manager from .cache_manager import cache_manager +from models.blog_models import MediumBlogGenerateRequest router = APIRouter(prefix="/api/blog", tags=["AI Blog Writer"]) @@ -289,4 +290,40 @@ async def get_outline_cache_entries(limit: int = 20): return cache_manager.get_recent_outline_cache_entries(limit) except Exception as e: logger.error(f"Failed to get outline cache entries: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +# --------------------------- +# Medium Blog Generation API +# --------------------------- + +@router.post("/generate/medium/start") +async def start_medium_generation(request: MediumBlogGenerateRequest): + """Start medium-length blog generation (≤1000 words) and return a task id.""" + try: + # Simple server-side guard + if (request.globalTargetWords or 1000) > 1000: + raise HTTPException(status_code=400, detail="Global target words exceed 1000; use per-section generation") + + task_id = task_manager.start_medium_generation_task(request) + return {"task_id": task_id, "status": "started"} + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to start medium generation: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/generate/medium/status/{task_id}") +async def medium_generation_status(task_id: str): + """Poll status for medium blog generation task.""" + try: + status = task_manager.get_task_status(task_id) + if status is None: + raise HTTPException(status_code=404, detail="Task not found") + return status + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to get medium generation status for {task_id}: {e}") raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/backend/api/blog_writer/task_manager.py b/backend/api/blog_writer/task_manager.py index 18157ee8..da4a8a2c 100644 --- a/backend/api/blog_writer/task_manager.py +++ b/backend/api/blog_writer/task_manager.py @@ -11,7 +11,12 @@ from datetime import datetime from typing import Any, Dict, List from loguru import logger -from models.blog_models import BlogResearchRequest, BlogOutlineRequest +from models.blog_models import ( + BlogResearchRequest, + BlogOutlineRequest, + MediumBlogGenerateRequest, + MediumBlogGenerateResult, +) from services.blog_writer.blog_service import BlogWriterService @@ -106,6 +111,12 @@ class TaskManager: asyncio.create_task(self._run_outline_generation_task(task_id, request)) return task_id + + def start_medium_generation_task(self, request: MediumBlogGenerateRequest) -> str: + """Start a medium (≤1000 words) full-blog generation task.""" + task_id = self.create_task("medium_generation") + asyncio.create_task(self._run_medium_generation_task(task_id, request)) + return task_id async def _run_research_task(self, task_id: str, request: BlogResearchRequest): """Background task to run research and update status with progress messages.""" @@ -174,6 +185,45 @@ class TaskManager: self.task_storage[task_id]["status"] = "failed" self.task_storage[task_id]["error"] = str(e) + async def _run_medium_generation_task(self, task_id: str, request: MediumBlogGenerateRequest): + """Background task to generate a medium blog using a single structured JSON call.""" + try: + self.task_storage[task_id]["status"] = "running" + self.task_storage[task_id]["progress_messages"] = [] + + await self.update_progress(task_id, "📦 Packaging outline and metadata...") + + # Basic guard: respect global target words + total_target = int(request.globalTargetWords or 1000) + if total_target > 1000: + raise ValueError("Global target words exceed 1000; medium generation not allowed") + + result: MediumBlogGenerateResult = await self.service.generate_medium_blog_with_progress( + request, + task_id, + ) + + if not result or not getattr(result, "sections", None): + raise ValueError("Empty generation result from model") + + # Check if result came from cache + cache_hit = getattr(result, 'cache_hit', False) + if cache_hit: + await self.update_progress(task_id, "⚡ Found cached content - loading instantly!") + else: + await self.update_progress(task_id, "🤖 Generated fresh content with AI...") + await self.update_progress(task_id, "✨ Post-processing and assembling sections...") + + # Mark completed + self.task_storage[task_id]["status"] = "completed" + self.task_storage[task_id]["result"] = result.dict() + await self.update_progress(task_id, f"✅ Generated {len(result.sections)} sections successfully.") + + except Exception as e: + await self.update_progress(task_id, f"❌ Medium generation failed: {str(e)}") + self.task_storage[task_id]["status"] = "failed" + self.task_storage[task_id]["error"] = str(e) + # Global task manager instance task_manager = TaskManager() diff --git a/backend/models/blog_models.py b/backend/models/blog_models.py index 6f9caf30..704c5047 100644 --- a/backend/models/blog_models.py +++ b/backend/models/blog_models.py @@ -215,3 +215,45 @@ class HallucinationCheckResponse(BaseModel): claims: List[Dict[str, Any]] = [] suggestions: List[Dict[str, Any]] = [] + +# ----------------------- +# Medium Blog Generation +# ----------------------- + +class MediumSectionOutline(BaseModel): + """Lightweight outline payload for medium blog generation.""" + id: str + heading: str + keyPoints: List[str] = [] + subheadings: List[str] = [] + keywords: List[str] = [] + targetWords: Optional[int] = None + references: List[ResearchSource] = [] + + +class MediumBlogGenerateRequest(BaseModel): + """Request to generate an entire medium-length blog in one pass.""" + title: str + sections: List[MediumSectionOutline] + persona: Optional[PersonaInfo] = None + tone: Optional[str] = None + audience: Optional[str] = None + globalTargetWords: Optional[int] = 1000 + researchKeywords: Optional[List[str]] = None # Original research keywords for better caching + + +class MediumGeneratedSection(BaseModel): + id: str + heading: str + content: str + wordCount: int + sources: Optional[List[ResearchSource]] = None + + +class MediumBlogGenerateResult(BaseModel): + success: bool = True + title: str + sections: List[MediumGeneratedSection] + model: Optional[str] = None + generation_time_ms: Optional[int] = None + safety_flags: Optional[Dict[str, Any]] = None \ No newline at end of file diff --git a/backend/services/blog_writer/core/blog_writer_service.py b/backend/services/blog_writer/core/blog_writer_service.py index 94804aa5..1568d239 100644 --- a/backend/services/blog_writer/core/blog_writer_service.py +++ b/backend/services/blog_writer/core/blog_writer_service.py @@ -24,11 +24,19 @@ from models.blog_models import ( BlogPublishRequest, BlogPublishResponse, BlogOutlineSection, + ResearchSource, ) from ..research import ResearchService from ..outline import OutlineService from ..content.enhanced_content_generator import EnhancedContentGenerator +from services.llm_providers.gemini_provider import gemini_structured_json_response +from services.cache.persistent_content_cache import persistent_content_cache +from models.blog_models import ( + MediumBlogGenerateRequest, + MediumBlogGenerateResult, + MediumGeneratedSection, +) class BlogWriterService: @@ -258,3 +266,180 @@ class BlogWriterService: """Publish content to specified platform.""" # TODO: Move to content module return BlogPublishResponse(success=True, platform=request.platform, url="https://example.com/post") + + async def generate_medium_blog_with_progress(self, req: MediumBlogGenerateRequest, task_id: str) -> MediumBlogGenerateResult: + """Use Gemini structured JSON to generate a medium-length blog in one call.""" + import time + start = time.time() + + # Prepare sections data for cache key generation + sections_for_cache = [] + for s in req.sections: + sections_for_cache.append({ + "id": s.id, + "heading": s.heading, + "keyPoints": getattr(s, "key_points", []) or getattr(s, "keyPoints", []), + "subheadings": getattr(s, "subheadings", []), + "keywords": getattr(s, "keywords", []), + "targetWords": getattr(s, "target_words", None) or getattr(s, "targetWords", None), + }) + + # Check cache first + cached_result = persistent_content_cache.get_cached_content( + keywords=req.researchKeywords or [], + sections=sections_for_cache, + global_target_words=req.globalTargetWords or 1000, + persona_data=req.persona.dict() if req.persona else None, + tone=req.tone, + audience=req.audience + ) + + if cached_result: + logger.info(f"Using cached content for keywords: {req.researchKeywords} (saved expensive generation)") + # Add cache hit marker to distinguish from fresh generation + cached_result['generation_time_ms'] = 0 # Mark as cache hit + cached_result['cache_hit'] = True + return MediumBlogGenerateResult(**cached_result) + + # Cache miss - proceed with AI generation + logger.info(f"Cache miss - generating new content for keywords: {req.researchKeywords}") + + # Build schema expected from the model + schema = { + "type": "object", + "properties": { + "title": {"type": "string"}, + "sections": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "heading": {"type": "string"}, + "content": {"type": "string"}, + "wordCount": {"type": "number"}, + "sources": { + "type": "array", + "items": { + "type": "object", + "properties": {"title": {"type": "string"}, "url": {"type": "string"}}, + }, + }, + }, + }, + }, + }, + } + + # Compose prompt + def section_block(s): + return { + "id": s.id, + "heading": s.heading, + "outline": { + "keyPoints": getattr(s, "key_points", []) or getattr(s, "keyPoints", []), + "subheadings": getattr(s, "subheadings", []), + "keywords": getattr(s, "keywords", []), + "targetWords": getattr(s, "target_words", None) or getattr(s, "targetWords", None), + "references": [ + {"title": r.title, "url": r.url} for r in getattr(s, "references", []) + ], + }, + } + + payload = { + "title": req.title, + "globalTargetWords": req.globalTargetWords or 1000, + "persona": req.persona.dict() if req.persona else None, + "tone": req.tone, + "audience": req.audience, + "sections": [section_block(s) for s in req.sections], + } + + system = ( + "You are a professional blog writer. Generate high-quality content for each section based on the provided outline. " + "Write engaging, informative content that follows the section's key points and target word count. " + "Use a professional tone and ensure the content flows naturally. " + "Format content with proper paragraph breaks using double line breaks (\\n\\n) between paragraphs. " + "Structure content with clear paragraphs - aim for 2-4 sentences per paragraph. " + "Return ONLY valid JSON with no markdown formatting or explanations." + ) + + import json + prompt = ( + f"Write blog content for the following sections. Each section should be {req.globalTargetWords or 1000} words total, distributed across all sections.\n\n" + f"Blog Title: {req.title}\n\n" + "For each section, write engaging content that:\n" + "- Follows the key points provided\n" + "- Uses the suggested keywords naturally\n" + "- Meets the target word count\n" + "- Maintains professional tone\n" + "- References the provided sources when relevant\n" + "- Breaks content into clear paragraphs (2-4 sentences each)\n" + "- Uses double line breaks (\\n\\n) between paragraphs for proper formatting\n" + "- Starts with an engaging opening paragraph\n" + "- Ends with a strong concluding paragraph\n\n" + "IMPORTANT: Format the 'content' field with proper paragraph breaks using \\n\\n between paragraphs.\n\n" + "Return a JSON object with 'title' and 'sections' array. Each section should have 'id', 'heading', 'content', and 'wordCount'.\n\n" + f"Sections to write:\n{json.dumps(payload, ensure_ascii=False, indent=2)}" + ) + + ai_resp = gemini_structured_json_response( + prompt=prompt, + schema=schema, + temperature=0.2, + max_tokens=8192, + system_prompt=system, + ) + + # Check for errors in AI response + if not ai_resp or ai_resp.get("error"): + error_msg = ai_resp.get("error", "Empty generation result from model") if ai_resp else "No response from model" + logger.error(f"AI generation failed: {error_msg}") + raise Exception(f"AI generation failed: {error_msg}") + + # Normalize output + title = ai_resp.get("title") or req.title + out_sections = [] + for s in ai_resp.get("sections", []) or []: + out_sections.append( + MediumGeneratedSection( + id=str(s.get("id")), + heading=s.get("heading") or "", + content=s.get("content") or "", + wordCount=int(s.get("wordCount") or 0), + sources=[ + # map to ResearchSource shape if possible; keep minimal + ResearchSource(title=src.get("title", ""), url=src.get("url", "")) + for src in (s.get("sources") or []) + ] or None, + ) + ) + + duration_ms = int((time.time() - start) * 1000) + result = MediumBlogGenerateResult( + success=True, + title=title, + sections=out_sections, + model="gemini-2.5-flash", + generation_time_ms=duration_ms, + safety_flags=None, + ) + + # Cache the result for future use + try: + persistent_content_cache.cache_content( + keywords=req.researchKeywords or [], + sections=sections_for_cache, + global_target_words=req.globalTargetWords or 1000, + persona_data=req.persona.dict() if req.persona else None, + tone=req.tone or "professional", + audience=req.audience or "general", + result=result.dict() + ) + logger.info(f"Cached content result for keywords: {req.researchKeywords}") + except Exception as cache_error: + logger.warning(f"Failed to cache content result: {cache_error}") + # Don't fail the entire operation if caching fails + + return result diff --git a/backend/services/cache/persistent_content_cache.py b/backend/services/cache/persistent_content_cache.py new file mode 100644 index 00000000..ab3094e0 --- /dev/null +++ b/backend/services/cache/persistent_content_cache.py @@ -0,0 +1,363 @@ +""" +Persistent Content Cache Service + +Provides database-backed caching for blog content generation results to survive server restarts +and provide better cache management across multiple instances. +""" + +import hashlib +import json +import sqlite3 +from typing import Dict, Any, Optional, List +from datetime import datetime, timedelta +from pathlib import Path +from loguru import logger + + +class PersistentContentCache: + """Database-backed cache for blog content generation results with exact parameter matching.""" + + def __init__(self, db_path: str = "content_cache.db", max_cache_size: int = 300, cache_ttl_hours: int = 72): + """ + Initialize the persistent content cache. + + Args: + db_path: Path to SQLite database file + max_cache_size: Maximum number of cached entries + cache_ttl_hours: Time-to-live for cache entries in hours (longer than research cache since content is expensive) + """ + self.db_path = db_path + self.max_cache_size = max_cache_size + self.cache_ttl = timedelta(hours=cache_ttl_hours) + + # Ensure database directory exists + Path(db_path).parent.mkdir(parents=True, exist_ok=True) + + # Initialize database + self._init_database() + + def _init_database(self): + """Initialize the SQLite database with required tables.""" + with sqlite3.connect(self.db_path) as conn: + conn.execute(""" + CREATE TABLE IF NOT EXISTS content_cache ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + cache_key TEXT UNIQUE NOT NULL, + title TEXT NOT NULL, + sections_hash TEXT NOT NULL, + global_target_words INTEGER NOT NULL, + persona_data TEXT, + tone TEXT, + audience TEXT, + result_data TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + expires_at TIMESTAMP NOT NULL, + access_count INTEGER DEFAULT 0, + last_accessed TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """) + + # Create indexes for better performance + conn.execute("CREATE INDEX IF NOT EXISTS idx_content_cache_key ON content_cache(cache_key)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_content_expires_at ON content_cache(expires_at)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_content_created_at ON content_cache(created_at)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_content_title ON content_cache(title)") + + conn.commit() + + def _generate_sections_hash(self, sections: List[Dict[str, Any]]) -> str: + """ + Generate a hash for sections based on their structure and content. + + Args: + sections: List of section dictionaries with outline information + + Returns: + MD5 hash of the normalized sections + """ + # Normalize sections for consistent hashing + normalized_sections = [] + for section in sections: + normalized_section = { + 'id': section.get('id', ''), + 'heading': section.get('heading', '').lower().strip(), + 'keyPoints': sorted([str(kp).lower().strip() for kp in section.get('keyPoints', [])]), + 'keywords': sorted([str(kw).lower().strip() for kw in section.get('keywords', [])]), + 'subheadings': sorted([str(sh).lower().strip() for sh in section.get('subheadings', [])]), + 'targetWords': section.get('targetWords', 0), + # Don't include references in hash as they might vary but content should remain similar + } + normalized_sections.append(normalized_section) + + # Sort sections by id for consistent ordering + normalized_sections.sort(key=lambda x: x['id']) + + # Generate hash + sections_str = json.dumps(normalized_sections, sort_keys=True) + return hashlib.md5(sections_str.encode('utf-8')).hexdigest() + + def _generate_cache_key(self, keywords: List[str], sections: List[Dict[str, Any]], + global_target_words: int, persona_data: Dict = None, + tone: str = None, audience: str = None) -> str: + """ + Generate a cache key based on exact parameter match. + + Args: + keywords: Original research keywords (primary cache key) + sections: List of section dictionaries with outline information + global_target_words: Target word count for entire blog + persona_data: Persona information + tone: Content tone + audience: Target audience + + Returns: + MD5 hash of the normalized parameters + """ + # Normalize parameters + normalized_keywords = sorted([kw.lower().strip() for kw in (keywords or [])]) + sections_hash = self._generate_sections_hash(sections) + normalized_tone = tone.lower().strip() if tone else "professional" + normalized_audience = audience.lower().strip() if audience else "general" + + # Normalize persona data + normalized_persona = "" + if persona_data: + # Sort persona keys and values for consistent hashing + persona_str = json.dumps(persona_data, sort_keys=True, default=str) + normalized_persona = persona_str.lower() + + # Create a consistent string representation + cache_string = f"{normalized_keywords}|{sections_hash}|{global_target_words}|{normalized_tone}|{normalized_audience}|{normalized_persona}" + + # Generate MD5 hash + return hashlib.md5(cache_string.encode('utf-8')).hexdigest() + + def _cleanup_expired_entries(self): + """Remove expired cache entries from database.""" + with sqlite3.connect(self.db_path) as conn: + cursor = conn.execute( + "DELETE FROM content_cache WHERE expires_at < ?", + (datetime.now().isoformat(),) + ) + deleted_count = cursor.rowcount + if deleted_count > 0: + logger.debug(f"Removed {deleted_count} expired content cache entries") + conn.commit() + + def _evict_oldest_entries(self, num_to_evict: int): + """Evict the oldest cache entries when cache is full.""" + with sqlite3.connect(self.db_path) as conn: + # Get oldest entries by creation time + cursor = conn.execute(""" + SELECT id FROM content_cache + ORDER BY created_at ASC + LIMIT ? + """, (num_to_evict,)) + + old_ids = [row[0] for row in cursor.fetchall()] + + if old_ids: + placeholders = ','.join(['?' for _ in old_ids]) + conn.execute(f"DELETE FROM content_cache WHERE id IN ({placeholders})", old_ids) + logger.debug(f"Evicted {len(old_ids)} oldest content cache entries") + + conn.commit() + + def get_cached_content(self, keywords: List[str], sections: List[Dict[str, Any]], + global_target_words: int, persona_data: Dict = None, + tone: str = None, audience: str = None) -> Optional[Dict[str, Any]]: + """ + Get cached content result for exact parameter match. + + Args: + keywords: Original research keywords (primary cache key) + sections: List of section dictionaries with outline information + global_target_words: Target word count for entire blog + persona_data: Persona information + tone: Content tone + audience: Target audience + + Returns: + Cached content result if found and valid, None otherwise + """ + cache_key = self._generate_cache_key(keywords, sections, global_target_words, persona_data, tone, audience) + + with sqlite3.connect(self.db_path) as conn: + cursor = conn.execute(""" + SELECT result_data, expires_at FROM content_cache + WHERE cache_key = ? AND expires_at > ? + """, (cache_key, datetime.now().isoformat())) + + row = cursor.fetchone() + + if row is None: + logger.debug(f"Content cache miss for keywords: {keywords}, sections: {len(sections)}") + return None + + # Update access statistics + conn.execute(""" + UPDATE content_cache + SET access_count = access_count + 1, last_accessed = CURRENT_TIMESTAMP + WHERE cache_key = ? + """, (cache_key,)) + conn.commit() + + try: + result_data = json.loads(row[0]) + logger.info(f"Content cache hit for keywords: {keywords} (saved expensive generation)") + return result_data + except json.JSONDecodeError: + logger.error(f"Invalid JSON in content cache for keywords: {keywords}") + # Remove invalid entry + conn.execute("DELETE FROM content_cache WHERE cache_key = ?", (cache_key,)) + conn.commit() + return None + + def cache_content(self, keywords: List[str], sections: List[Dict[str, Any]], + global_target_words: int, persona_data: Dict, tone: str, + audience: str, result: Dict[str, Any]): + """ + Cache a content generation result. + + Args: + keywords: Original research keywords (primary cache key) + sections: List of section dictionaries with outline information + global_target_words: Target word count for entire blog + persona_data: Persona information + tone: Content tone + audience: Target audience + result: Content result to cache + """ + cache_key = self._generate_cache_key(keywords, sections, global_target_words, persona_data, tone, audience) + sections_hash = self._generate_sections_hash(sections) + + # Cleanup expired entries first + self._cleanup_expired_entries() + + # Check if cache is full and evict if necessary + with sqlite3.connect(self.db_path) as conn: + cursor = conn.execute("SELECT COUNT(*) FROM content_cache") + current_count = cursor.fetchone()[0] + + if current_count >= self.max_cache_size: + num_to_evict = current_count - self.max_cache_size + 1 + self._evict_oldest_entries(num_to_evict) + + # Store the result + expires_at = datetime.now() + self.cache_ttl + + with sqlite3.connect(self.db_path) as conn: + conn.execute(""" + INSERT OR REPLACE INTO content_cache + (cache_key, title, sections_hash, global_target_words, persona_data, tone, audience, result_data, expires_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + cache_key, + json.dumps(keywords), # Store keywords as JSON + sections_hash, + global_target_words, + json.dumps(persona_data) if persona_data else "", + tone or "", + audience or "", + json.dumps(result), + expires_at.isoformat() + )) + conn.commit() + + logger.info(f"Cached content result for keywords: {keywords}, {len(sections)} sections") + + def get_cache_stats(self) -> Dict[str, Any]: + """Get cache statistics.""" + self._cleanup_expired_entries() + + with sqlite3.connect(self.db_path) as conn: + # Get basic stats + cursor = conn.execute("SELECT COUNT(*) FROM content_cache") + total_entries = cursor.fetchone()[0] + + cursor = conn.execute("SELECT COUNT(*) FROM content_cache WHERE expires_at > ?", (datetime.now().isoformat(),)) + valid_entries = cursor.fetchone()[0] + + # Get most accessed entries + cursor = conn.execute(""" + SELECT title, global_target_words, access_count, created_at + FROM content_cache + ORDER BY access_count DESC + LIMIT 10 + """) + top_entries = [ + { + 'title': row[0], + 'global_target_words': row[1], + 'access_count': row[2], + 'created_at': row[3] + } + for row in cursor.fetchall() + ] + + # Get database size + cursor = conn.execute("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()") + db_size_bytes = cursor.fetchone()[0] + db_size_mb = db_size_bytes / (1024 * 1024) + + return { + 'total_entries': total_entries, + 'valid_entries': valid_entries, + 'expired_entries': total_entries - valid_entries, + 'max_size': self.max_cache_size, + 'ttl_hours': self.cache_ttl.total_seconds() / 3600, + 'database_size_mb': round(db_size_mb, 2), + 'top_accessed_entries': top_entries + } + + def clear_cache(self): + """Clear all cached entries.""" + with sqlite3.connect(self.db_path) as conn: + conn.execute("DELETE FROM content_cache") + conn.commit() + logger.info("Content cache cleared") + + def get_cache_entries(self, limit: int = 50) -> List[Dict[str, Any]]: + """Get recent cache entries for debugging.""" + with sqlite3.connect(self.db_path) as conn: + cursor = conn.execute(""" + SELECT title, global_target_words, tone, audience, created_at, expires_at, access_count + FROM content_cache + ORDER BY created_at DESC + LIMIT ? + """, (limit,)) + + return [ + { + 'title': row[0], + 'global_target_words': row[1], + 'tone': row[2], + 'audience': row[3], + 'created_at': row[4], + 'expires_at': row[5], + 'access_count': row[6] + } + for row in cursor.fetchall() + ] + + def invalidate_cache_for_title(self, title: str): + """ + Invalidate all cache entries for specific title. + Useful when outline is updated. + + Args: + title: Title to invalidate cache for + """ + normalized_title = title.lower().strip() + + with sqlite3.connect(self.db_path) as conn: + cursor = conn.execute("DELETE FROM content_cache WHERE LOWER(title) = ?", (normalized_title,)) + deleted_count = cursor.rowcount + conn.commit() + + if deleted_count > 0: + logger.info(f"Invalidated {deleted_count} content cache entries for title: {title}") + + +# Global persistent cache instance +persistent_content_cache = PersistentContentCache() diff --git a/backend/services/llm_providers/gemini_provider.py b/backend/services/llm_providers/gemini_provider.py index 53a402df..01617fbd 100644 --- a/backend/services/llm_providers/gemini_provider.py +++ b/backend/services/llm_providers/gemini_provider.py @@ -407,11 +407,50 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9, logger.info("No parsed content, trying to parse text response") try: import json - parsed_text = json.loads(response.text) + import re + + # Clean the text response to fix common JSON issues + cleaned_text = response.text.strip() + + # Remove any markdown code blocks if present + if cleaned_text.startswith('```json'): + cleaned_text = cleaned_text[7:] + if cleaned_text.endswith('```'): + cleaned_text = cleaned_text[:-3] + cleaned_text = cleaned_text.strip() + + # Try to find JSON content between curly braces + json_match = re.search(r'\{.*\}', cleaned_text, re.DOTALL) + if json_match: + cleaned_text = json_match.group(0) + + parsed_text = json.loads(cleaned_text) logger.info("Successfully parsed text as JSON") return parsed_text except json.JSONDecodeError as e: logger.error(f"Failed to parse text as JSON: {e}") + logger.debug(f"Problematic text (first 500 chars): {response.text[:500]}") + + # Try to extract and fix JSON manually + try: + import re + # Look for the main JSON object + json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}' + matches = re.findall(json_pattern, response.text, re.DOTALL) + if matches: + # Try the largest match (likely the main JSON) + largest_match = max(matches, key=len) + # Basic cleanup of common issues + fixed_json = largest_match.replace('\n', ' ').replace('\r', ' ') + # Remove any trailing commas before closing braces + fixed_json = re.sub(r',\s*}', '}', fixed_json) + fixed_json = re.sub(r',\s*]', ']', fixed_json) + + parsed_text = json.loads(fixed_json) + logger.info("Successfully parsed cleaned JSON") + return parsed_text + except Exception as fix_error: + logger.error(f"Failed to fix JSON manually: {fix_error}") # Check candidates for content (fallback for edge cases) if hasattr(response, 'candidates') and response.candidates: diff --git a/frontend/src/components/BlogWriter/BlogWriter.tsx b/frontend/src/components/BlogWriter/BlogWriter.tsx index a29745ae..f7b9e5d6 100644 --- a/frontend/src/components/BlogWriter/BlogWriter.tsx +++ b/frontend/src/components/BlogWriter/BlogWriter.tsx @@ -1,8 +1,8 @@ -import React from 'react'; +import React, { useState, useEffect } from 'react'; import { CopilotSidebar } from '@copilotkit/react-ui'; import '@copilotkit/react-ui/styles.css'; import { blogWriterApi } from '../../services/blogWriterApi'; -import { useOutlinePolling } from '../../hooks/usePolling'; +import { useOutlinePolling, useMediumGenerationPolling, useResearchPolling } from '../../hooks/usePolling'; import { useClaimFixer } from '../../hooks/useClaimFixer'; import { useMarkdownProcessor } from '../../hooks/useMarkdownProcessor'; import { useBlogWriterState } from '../../hooks/useBlogWriterState'; @@ -20,11 +20,12 @@ import { EnhancedOutlineActions } from './EnhancedOutlineActions'; import HallucinationChecker from './HallucinationChecker'; import Publisher from './Publisher'; import OutlineGenerator from './OutlineGenerator'; -import SectionGenerator from './SectionGenerator'; import OutlineRefiner from './OutlineRefiner'; import SEOProcessor from './SEOProcessor'; import BlogWriterLanding from './BlogWriterLanding'; -import ResearchProgressModal from './ResearchProgressModal'; +import { OutlineProgressModal } from './OutlineProgressModal'; +import OutlineFeedbackForm from './OutlineFeedbackForm'; +import { BlogEditor } from './WYSIWYG'; export const BlogWriter: React.FC = () => { // Use custom hook for all state management @@ -45,6 +46,7 @@ export const BlogWriter: React.FC = () => { researchCoverage, researchTitles, aiGeneratedTitles, + outlineConfirmed, setOutline, setTitleOptions, setSections, @@ -55,10 +57,12 @@ export const BlogWriter: React.FC = () => { handleResearchComplete, handleOutlineComplete, handleOutlineError, - handleSectionGenerated, - handleContinuityRefresh, handleTitleSelect, - handleCustomTitle + handleCustomTitle, + handleOutlineConfirmed, + handleOutlineRefined, + handleContentUpdate, + handleContentSave } = useBlogWriterState(); // Custom hooks for complex functionality @@ -68,13 +72,16 @@ export const BlogWriter: React.FC = () => { setSections ); - const { convertMarkdownToHTML, getTotalWords, getOutlineStats } = useMarkdownProcessor( + const { convertMarkdownToHTML } = useMarkdownProcessor( outline, sections ); - // Get suggestions - const suggestions = useSuggestions(research, outline); + // Research polling hook (for context awareness) + const researchPolling = useResearchPolling({ + onComplete: handleResearchComplete, + onError: (error) => console.error('Research polling error:', error) + }); // Outline polling hook const outlinePolling = useOutlinePolling({ @@ -82,22 +89,90 @@ export const BlogWriter: React.FC = () => { onError: handleOutlineError }); + // Medium generation polling (used after confirm if short blog) + const mediumPolling = useMediumGenerationPolling({ + onComplete: (result: any) => { + try { + if (result && result.sections) { + const newSections: Record = {}; + result.sections.forEach((s: any) => { + newSections[String(s.id)] = s.content || ''; + }); + setSections(newSections); + } + } catch (e) { + console.error('Failed to apply medium generation result:', e); + } + }, + onError: (err) => console.error('Medium generation failed:', err) + }); + + // Get context-aware suggestions based on current task status + const suggestions = useSuggestions( + research, + outline, + outlineConfirmed, + { isPolling: researchPolling.isPolling, currentStatus: researchPolling.currentStatus }, + { isPolling: outlinePolling.isPolling, currentStatus: outlinePolling.currentStatus }, + { isPolling: mediumPolling.isPolling, currentStatus: mediumPolling.currentStatus } + ); + + // Add minimum display time for modal + const [showModal, setShowModal] = useState(false); + const [modalStartTime, setModalStartTime] = useState(null); + const [isMediumGenerationStarting, setIsMediumGenerationStarting] = useState(false); + + useEffect(() => { + if ((mediumPolling.isPolling || isMediumGenerationStarting) && !showModal) { + setShowModal(true); + setModalStartTime(Date.now()); + } else if (!mediumPolling.isPolling && !isMediumGenerationStarting && showModal) { + const elapsed = Date.now() - (modalStartTime || 0); + const minDisplayTime = 2000; // 2 seconds minimum + + if (elapsed < minDisplayTime) { + setTimeout(() => { + setShowModal(false); + setModalStartTime(null); + }, minDisplayTime - elapsed); + } else { + setShowModal(false); + setModalStartTime(null); + } + } + }, [mediumPolling.isPolling, isMediumGenerationStarting, showModal, modalStartTime]); + + // Handle medium generation start from OutlineFeedbackForm + const handleMediumGenerationStarted = (taskId: string) => { + console.log('Starting medium generation polling for task:', taskId); + setIsMediumGenerationStarting(false); // Clear the starting state + mediumPolling.startPolling(taskId); + }; + + // Show modal immediately when copilot action is triggered + const handleMediumGenerationTriggered = () => { + console.log('Medium generation triggered - showing modal immediately'); + setIsMediumGenerationStarting(true); + }; + + // Debug medium polling state + console.log('Medium polling state:', { + isPolling: mediumPolling.isPolling, + status: mediumPolling.currentStatus, + progressCount: mediumPolling.progressMessages.length + }); + + return (
- {/* Outline Progress Modal */} - { /* informational while processing */ }} - /> {/* Extracted Components */} - + researchPolling.startPolling(taskId)} + /> { outline={outline} onOutlineUpdated={setOutline} /> + {/* New extracted functionality components */} { onTaskStart={(taskId) => setOutlineTaskId(taskId)} onPollingStart={(taskId) => outlinePolling.startPolling(taskId)} /> - { {research && outline.length === 0 && } {outline.length > 0 && (
- {/* Enhanced Title Selection */} - - + ) : ( + /* Outline Editor - Show when outline is not confirmed */ + <> + {/* Enhanced Title Selection */} + + - {/* Enhanced Outline Editor */} - blogWriterApi.refineOutline({ outline, operation: op, section_id: id, payload }).then((res: any) => setOutline(res.outline))} - /> + {/* Enhanced Outline Editor */} + blogWriterApi.refineOutline({ outline, operation: op, section_id: id, payload }).then((res: any) => setOutline(res.outline))} + /> - {/* Draft/Polished Mode Toggle */} -
- - -
- - {outline.map(s => ( -
-
-

{s.heading}

- {/* Continuity badge */} - {sections[s.id] && ( - - )} + {/* Draft/Polished Mode Toggle */} +
+ +
- {sections[s.id] ? ( - <> -
{sections[s.id]}
- - - ) : ( -
Ask the copilot to generate this section.
- )} -
- ))} + + {outline.map(s => ( +
+
+

{s.heading}

+ {/* Continuity badge */} + {sections[s.id] && ( + + )} +
+ {sections[s.id] ? ( + <> +
{sections[s.id]}
+ + + ) : ( +
Ask the copilot to generate this section.
+ )} +
+ ))} + + )}
)}
@@ -231,6 +325,7 @@ export const BlogWriter: React.FC = () => { // Get current state information const hasResearch = research !== null; const hasOutline = outline.length > 0; + const isOutlineConfirmed = outlineConfirmed; const researchInfo = hasResearch ? { sources: research.sources?.length || 0, queries: research.search_queries?.length || 0, @@ -239,6 +334,14 @@ export const BlogWriter: React.FC = () => { searchIntent: research.keyword_analysis?.search_intent || 'informational' } : null; + const outlineContext = hasOutline ? ` +OUTLINE DETAILS: +- Total sections: ${outline.length} +- Section headings: ${outline.map(s => s.heading).join(', ')} +- Total target words: ${outline.reduce((sum, s) => sum + (s.target_words || 0), 0)} +- Section breakdown: ${outline.map(s => `${s.heading} (${s.target_words || 0} words, ${s.subheadings?.length || 0} subheadings, ${s.key_points?.length || 0} key points)`).join('; ')} +` : ''; + const toolGuide = ` You are the ALwrity Blog Writing Assistant. You MUST call the appropriate frontend actions (tools) to fulfill user requests. @@ -252,7 +355,8 @@ ${hasResearch && researchInfo ? ` - Search intent: ${researchInfo.searchIntent} ` : '❌ No research completed yet'} -${hasOutline ? `✅ OUTLINE GENERATED: ${outline.length} sections created` : '❌ No outline generated yet'} +${hasOutline ? `✅ OUTLINE GENERATED: ${outline.length} sections created${isOutlineConfirmed ? ' (CONFIRMED)' : ' (PENDING CONFIRMATION)'}` : '❌ No outline generated yet'} +${outlineContext} Available tools: - getResearchKeywords(prompt?: string) - Get keywords from user for research @@ -261,9 +365,12 @@ Available tools: - chatWithResearchData(question: string) - Chat with research data to explore insights and get recommendations - generateOutline() - createOutlineWithCustomInputs(customInstructions: string) - Create outline with user's custom instructions +- refineOutline(prompt?: string) - Refine outline based on user feedback +- chatWithOutline(question?: string) - Chat with outline to get insights and ask questions about content structure +- confirmOutlineAndGenerateContent() - Confirm outline and mark as ready for content generation (does NOT auto-generate content) - generateSection(sectionId: string) - generateAllSections() -- refineOutline(operation: add|remove|move|merge|rename, sectionId?: string, payload?: object) +- refineOutlineStructure(operation: add|remove|move|merge|rename, sectionId?: string, payload?: object) - enhanceSection(sectionId: string, focus?: string) - Enhance a specific section with AI improvements - optimizeOutline(focus?: string) - Optimize entire outline for better flow, SEO, and engagement - rebalanceOutline(targetWords?: number) - Rebalance word count distribution across sections @@ -282,20 +389,48 @@ Available tools: - If user wants to explore research data, use chatWithResearchData() but then guide them to outline creation - If user has specific outline requirements, use createOutlineWithCustomInputs() with their instructions - When user asks for outline, call generateOutline() or createOutlineWithCustomInputs() based on their needs - - When user asks to generate content, call generateSection or generateAllSections + - After outline generation, ALWAYS guide user to review and confirm the outline + - If user wants to discuss the outline, use chatWithOutline() to provide insights and answer questions + - If user wants to refine the outline, use refineOutline() to collect their feedback and refine + - When user clicks "Confirm & Generate Content", ONLY call confirmOutlineAndGenerateContent() - DO NOT automatically generate content + - Only after outline confirmation, show content generation suggestions and wait for user to explicitly request content generation + - When user asks to generate content before outline confirmation, remind them to confirm the outline first + - Content generation should ONLY happen when user explicitly clicks "Generate all sections" or "Generate [specific section]" ENGAGEMENT TACTICS: - DO NOT ask for clarification - take action immediately with the information provided - Always call the appropriate tool instead of just talking about what you could do - Be aware of the current state and reference research results when relevant - - Guide users through the process: Research → Outline → Content → SEO → Publish + - Guide users through the process: Research → Outline → Outline Review & Confirmation → Content → SEO → Publish - Use encouraging language and highlight progress made - If user seems lost, remind them of the current stage and suggest the next step - When research is complete, emphasize the value of the data found and guide to outline creation + - When outline is generated, emphasize the importance of reviewing and confirming before content generation + - Encourage users to make small manual edits to the outline UI before using AI for major changes `; return [toolGuide, additional].filter(Boolean).join('\n\n'); }} /> + + {/* Outline Progress Modal */} + {/* Outline modal */} + m.message)} + latestMessage={outlinePolling.progressMessages.length > 0 ? outlinePolling.progressMessages[outlinePolling.progressMessages.length - 1].message : ''} + error={outlinePolling.error} + /> + + {/* Medium generation modal */} + m.message)} + latestMessage={mediumPolling.progressMessages.length > 0 ? mediumPolling.progressMessages[mediumPolling.progressMessages.length - 1].message : ''} + error={mediumPolling.error} + titleOverride={'📝 Generating Your Blog Content'} + />
); }; diff --git a/frontend/src/components/BlogWriter/KeywordInputForm.tsx b/frontend/src/components/BlogWriter/KeywordInputForm.tsx index 31632a64..5b1a6a88 100644 --- a/frontend/src/components/BlogWriter/KeywordInputForm.tsx +++ b/frontend/src/components/BlogWriter/KeywordInputForm.tsx @@ -9,6 +9,7 @@ const useCopilotActionTyped = useCopilotAction as any; interface KeywordInputFormProps { onKeywordsReceived?: (data: { keywords: string; blogLength: string }) => void; onResearchComplete?: (researchData: BlogResearchResponse) => void; + onTaskStart?: (taskId: string) => void; } // Separate component to manage form state @@ -140,7 +141,7 @@ const ResearchForm: React.FC<{ ); }; -export const KeywordInputForm: React.FC = ({ onKeywordsReceived, onResearchComplete }) => { +export const KeywordInputForm: React.FC = ({ onKeywordsReceived, onResearchComplete, onTaskStart }) => { const [currentTaskId, setCurrentTaskId] = useState(null); // Keyword input action with Human-in-the-Loop @@ -214,9 +215,13 @@ export const KeywordInputForm: React.FC = ({ onKeywordsRe word_count_target: parseInt(blogLength) }; + // Store the blog length in localStorage for later use + localStorage.setItem('blog_length_target', blogLength); + // Start async research const { task_id } = await blogWriterApi.startResearch(payload); setCurrentTaskId(task_id); + onTaskStart?.(task_id); // Notify parent component to start polling return { success: true, diff --git a/frontend/src/components/BlogWriter/OutlineFeedbackForm.tsx b/frontend/src/components/BlogWriter/OutlineFeedbackForm.tsx new file mode 100644 index 00000000..d3b74df5 --- /dev/null +++ b/frontend/src/components/BlogWriter/OutlineFeedbackForm.tsx @@ -0,0 +1,498 @@ +import React, { useState } from 'react'; +import { useCopilotAction } from '@copilotkit/react-core'; +import { BlogOutlineSection, BlogResearchResponse, blogWriterApi, mediumBlogApi } from '../../services/blogWriterApi'; +import { useMediumGenerationPolling } from '../../hooks/usePolling'; + +// Simple toast notification function +const showToast = (message: string, type: 'success' | 'error' = 'success') => { + const toast = document.createElement('div'); + toast.style.cssText = ` + position: fixed; + top: 20px; + right: 20px; + padding: 16px 24px; + border-radius: 8px; + color: white; + font-weight: 500; + z-index: 10000; + max-width: 400px; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15); + transform: translateX(100%); + transition: transform 0.3s ease; + background-color: ${type === 'success' ? '#4caf50' : '#f44336'}; + `; + toast.textContent = message; + document.body.appendChild(toast); + + // Animate in + setTimeout(() => { + toast.style.transform = 'translateX(0)'; + }, 100); + + // Remove after 4 seconds + setTimeout(() => { + toast.style.transform = 'translateX(100%)'; + setTimeout(() => { + document.body.removeChild(toast); + }, 300); + }, 4000); +}; + +const useCopilotActionTyped = useCopilotAction as any; + +interface OutlineFeedbackFormProps { + outline: BlogOutlineSection[]; + research: BlogResearchResponse; + onOutlineConfirmed: () => void; + onOutlineRefined: (feedback: string) => void; + onMediumGenerationStarted?: (taskId: string) => void; + onMediumGenerationTriggered?: () => void; +} + + +// Separate component to manage feedback form state +const FeedbackForm: React.FC<{ + prompt?: string; + onSubmit: (data: { feedback: string; action: 'refine' | 'confirm' }) => void; + onCancel: () => void; +}> = ({ prompt, onSubmit, onCancel }) => { + const [feedback, setFeedback] = useState(''); + const [action, setAction] = useState<'refine' | 'confirm'>('refine'); + const hasValidInput = feedback.trim().length > 0 || action === 'confirm'; + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault(); + if (hasValidInput) { + onSubmit({ feedback: feedback.trim(), action }); + } else { + window.alert('Please provide feedback or confirm the outline.'); + } + }; + + return ( +
+

+ 📝 Outline Review & Feedback +

+

+ {prompt || 'Please review the generated outline and provide your feedback:'} +

+ +
+
+ +
+ + +
+
+ + {action === 'refine' && ( +
+ +