Base code

2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions
--- a/backend/services/cache/init.py
+++ b/backend/services/cache/init.py
@@ -0,0 +1 @@
+# Cache services for AI Blog Writer
--- a/backend/services/cache/persistent_content_cache.py
+++ b/backend/services/cache/persistent_content_cache.py
@@ -0,0 +1,363 @@
+"""
+Persistent Content Cache Service
+
+Provides database-backed caching for blog content generation results to survive server restarts
+and provide better cache management across multiple instances.
+"""
+
+import hashlib
+import json
+import sqlite3
+from typing import Dict, Any, Optional, List
+from datetime import datetime, timedelta
+from pathlib import Path
+from loguru import logger
+
+
+class PersistentContentCache:
+    """Database-backed cache for blog content generation results with exact parameter matching."""
+    
+    def __init__(self, db_path: str = "content_cache.db", max_cache_size: int = 300, cache_ttl_hours: int = 72):
+        """
+        Initialize the persistent content cache.
+        
+        Args:
+            db_path: Path to SQLite database file
+            max_cache_size: Maximum number of cached entries
+            cache_ttl_hours: Time-to-live for cache entries in hours (longer than research cache since content is expensive)
+        """
+        self.db_path = db_path
+        self.max_cache_size = max_cache_size
+        self.cache_ttl = timedelta(hours=cache_ttl_hours)
+        
+        # Ensure database directory exists
+        Path(db_path).parent.mkdir(parents=True, exist_ok=True)
+        
+        # Initialize database
+        self._init_database()
+    
+    def _init_database(self):
+        """Initialize the SQLite database with required tables."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS content_cache (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    cache_key TEXT UNIQUE NOT NULL,
+                    title TEXT NOT NULL,
+                    sections_hash TEXT NOT NULL,
+                    global_target_words INTEGER NOT NULL,
+                    persona_data TEXT,
+                    tone TEXT,
+                    audience TEXT,
+                    result_data TEXT NOT NULL,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    expires_at TIMESTAMP NOT NULL,
+                    access_count INTEGER DEFAULT 0,
+                    last_accessed TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+            
+            # Create indexes for better performance
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_content_cache_key ON content_cache(cache_key)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_content_expires_at ON content_cache(expires_at)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_content_created_at ON content_cache(created_at)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_content_title ON content_cache(title)")
+            
+            conn.commit()
+    
+    def _generate_sections_hash(self, sections: List[Dict[str, Any]]) -> str:
+        """
+        Generate a hash for sections based on their structure and content.
+        
+        Args:
+            sections: List of section dictionaries with outline information
+            
+        Returns:
+            MD5 hash of the normalized sections
+        """
+        # Normalize sections for consistent hashing
+        normalized_sections = []
+        for section in sections:
+            normalized_section = {
+                'id': section.get('id', ''),
+                'heading': section.get('heading', '').lower().strip(),
+                'keyPoints': sorted([str(kp).lower().strip() for kp in section.get('keyPoints', [])]),
+                'keywords': sorted([str(kw).lower().strip() for kw in section.get('keywords', [])]),
+                'subheadings': sorted([str(sh).lower().strip() for sh in section.get('subheadings', [])]),
+                'targetWords': section.get('targetWords', 0),
+                # Don't include references in hash as they might vary but content should remain similar
+            }
+            normalized_sections.append(normalized_section)
+        
+        # Sort sections by id for consistent ordering
+        normalized_sections.sort(key=lambda x: x['id'])
+        
+        # Generate hash
+        sections_str = json.dumps(normalized_sections, sort_keys=True)
+        return hashlib.md5(sections_str.encode('utf-8')).hexdigest()
+    
+    def _generate_cache_key(self, keywords: List[str], sections: List[Dict[str, Any]], 
+                           global_target_words: int, persona_data: Dict = None, 
+                           tone: str = None, audience: str = None) -> str:
+        """
+        Generate a cache key based on exact parameter match.
+        
+        Args:
+            keywords: Original research keywords (primary cache key)
+            sections: List of section dictionaries with outline information
+            global_target_words: Target word count for entire blog
+            persona_data: Persona information
+            tone: Content tone
+            audience: Target audience
+            
+        Returns:
+            MD5 hash of the normalized parameters
+        """
+        # Normalize parameters
+        normalized_keywords = sorted([kw.lower().strip() for kw in (keywords or [])])
+        sections_hash = self._generate_sections_hash(sections)
+        normalized_tone = tone.lower().strip() if tone else "professional"
+        normalized_audience = audience.lower().strip() if audience else "general"
+        
+        # Normalize persona data
+        normalized_persona = ""
+        if persona_data:
+            # Sort persona keys and values for consistent hashing
+            persona_str = json.dumps(persona_data, sort_keys=True, default=str)
+            normalized_persona = persona_str.lower()
+        
+        # Create a consistent string representation
+        cache_string = f"{normalized_keywords}|{sections_hash}|{global_target_words}|{normalized_tone}|{normalized_audience}|{normalized_persona}"
+        
+        # Generate MD5 hash
+        return hashlib.md5(cache_string.encode('utf-8')).hexdigest()
+    
+    def _cleanup_expired_entries(self):
+        """Remove expired cache entries from database."""
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute(
+                "DELETE FROM content_cache WHERE expires_at < ?",
+                (datetime.now().isoformat(),)
+            )
+            deleted_count = cursor.rowcount
+            if deleted_count > 0:
+                logger.debug(f"Removed {deleted_count} expired content cache entries")
+            conn.commit()
+    
+    def _evict_oldest_entries(self, num_to_evict: int):
+        """Evict the oldest cache entries when cache is full."""
+        with sqlite3.connect(self.db_path) as conn:
+            # Get oldest entries by creation time
+            cursor = conn.execute("""
+                SELECT id FROM content_cache 
+                ORDER BY created_at ASC 
+                LIMIT ?
+            """, (num_to_evict,))
+            
+            old_ids = [row[0] for row in cursor.fetchall()]
+            
+            if old_ids:
+                placeholders = ','.join(['?' for _ in old_ids])
+                conn.execute(f"DELETE FROM content_cache WHERE id IN ({placeholders})", old_ids)
+                logger.debug(f"Evicted {len(old_ids)} oldest content cache entries")
+            
+            conn.commit()
+    
+    def get_cached_content(self, keywords: List[str], sections: List[Dict[str, Any]], 
+                          global_target_words: int, persona_data: Dict = None, 
+                          tone: str = None, audience: str = None) -> Optional[Dict[str, Any]]:
+        """
+        Get cached content result for exact parameter match.
+        
+        Args:
+            keywords: Original research keywords (primary cache key)
+            sections: List of section dictionaries with outline information
+            global_target_words: Target word count for entire blog
+            persona_data: Persona information
+            tone: Content tone
+            audience: Target audience
+            
+        Returns:
+            Cached content result if found and valid, None otherwise
+        """
+        cache_key = self._generate_cache_key(keywords, sections, global_target_words, persona_data, tone, audience)
+        
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("""
+                SELECT result_data, expires_at FROM content_cache 
+                WHERE cache_key = ? AND expires_at > ?
+            """, (cache_key, datetime.now().isoformat()))
+            
+            row = cursor.fetchone()
+            
+            if row is None:
+                logger.debug(f"Content cache miss for keywords: {keywords}, sections: {len(sections)}")
+                return None
+            
+            # Update access statistics
+            conn.execute("""
+                UPDATE content_cache 
+                SET access_count = access_count + 1, last_accessed = CURRENT_TIMESTAMP
+                WHERE cache_key = ?
+            """, (cache_key,))
+            conn.commit()
+            
+            try:
+                result_data = json.loads(row[0])
+                logger.info(f"Content cache hit for keywords: {keywords} (saved expensive generation)")
+                return result_data
+            except json.JSONDecodeError:
+                logger.error(f"Invalid JSON in content cache for keywords: {keywords}")
+                # Remove invalid entry
+                conn.execute("DELETE FROM content_cache WHERE cache_key = ?", (cache_key,))
+                conn.commit()
+                return None
+    
+    def cache_content(self, keywords: List[str], sections: List[Dict[str, Any]], 
+                     global_target_words: int, persona_data: Dict, tone: str, 
+                     audience: str, result: Dict[str, Any]):
+        """
+        Cache a content generation result.
+        
+        Args:
+            keywords: Original research keywords (primary cache key)
+            sections: List of section dictionaries with outline information
+            global_target_words: Target word count for entire blog
+            persona_data: Persona information
+            tone: Content tone
+            audience: Target audience
+            result: Content result to cache
+        """
+        cache_key = self._generate_cache_key(keywords, sections, global_target_words, persona_data, tone, audience)
+        sections_hash = self._generate_sections_hash(sections)
+        
+        # Cleanup expired entries first
+        self._cleanup_expired_entries()
+        
+        # Check if cache is full and evict if necessary
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("SELECT COUNT(*) FROM content_cache")
+            current_count = cursor.fetchone()[0]
+            
+            if current_count >= self.max_cache_size:
+                num_to_evict = current_count - self.max_cache_size + 1
+                self._evict_oldest_entries(num_to_evict)
+        
+        # Store the result
+        expires_at = datetime.now() + self.cache_ttl
+        
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                INSERT OR REPLACE INTO content_cache 
+                (cache_key, title, sections_hash, global_target_words, persona_data, tone, audience, result_data, expires_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """, (
+                cache_key,
+                json.dumps(keywords),  # Store keywords as JSON
+                sections_hash,
+                global_target_words,
+                json.dumps(persona_data) if persona_data else "",
+                tone or "",
+                audience or "",
+                json.dumps(result),
+                expires_at.isoformat()
+            ))
+            conn.commit()
+        
+        logger.info(f"Cached content result for keywords: {keywords}, {len(sections)} sections")
+    
+    def get_cache_stats(self) -> Dict[str, Any]:
+        """Get cache statistics."""
+        self._cleanup_expired_entries()
+        
+        with sqlite3.connect(self.db_path) as conn:
+            # Get basic stats
+            cursor = conn.execute("SELECT COUNT(*) FROM content_cache")
+            total_entries = cursor.fetchone()[0]
+            
+            cursor = conn.execute("SELECT COUNT(*) FROM content_cache WHERE expires_at > ?", (datetime.now().isoformat(),))
+            valid_entries = cursor.fetchone()[0]
+            
+            # Get most accessed entries
+            cursor = conn.execute("""
+                SELECT title, global_target_words, access_count, created_at
+                FROM content_cache 
+                ORDER BY access_count DESC 
+                LIMIT 10
+            """)
+            top_entries = [
+                {
+                    'title': row[0],
+                    'global_target_words': row[1],
+                    'access_count': row[2],
+                    'created_at': row[3]
+                }
+                for row in cursor.fetchall()
+            ]
+            
+            # Get database size
+            cursor = conn.execute("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()")
+            db_size_bytes = cursor.fetchone()[0]
+            db_size_mb = db_size_bytes / (1024 * 1024)
+        
+        return {
+            'total_entries': total_entries,
+            'valid_entries': valid_entries,
+            'expired_entries': total_entries - valid_entries,
+            'max_size': self.max_cache_size,
+            'ttl_hours': self.cache_ttl.total_seconds() / 3600,
+            'database_size_mb': round(db_size_mb, 2),
+            'top_accessed_entries': top_entries
+        }
+    
+    def clear_cache(self):
+        """Clear all cached entries."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("DELETE FROM content_cache")
+            conn.commit()
+        logger.info("Content cache cleared")
+    
+    def get_cache_entries(self, limit: int = 50) -> List[Dict[str, Any]]:
+        """Get recent cache entries for debugging."""
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("""
+                SELECT title, global_target_words, tone, audience, created_at, expires_at, access_count
+                FROM content_cache 
+                ORDER BY created_at DESC 
+                LIMIT ?
+            """, (limit,))
+            
+            return [
+                {
+                    'title': row[0],
+                    'global_target_words': row[1],
+                    'tone': row[2],
+                    'audience': row[3],
+                    'created_at': row[4],
+                    'expires_at': row[5],
+                    'access_count': row[6]
+                }
+                for row in cursor.fetchall()
+            ]
+    
+    def invalidate_cache_for_title(self, title: str):
+        """
+        Invalidate all cache entries for specific title.
+        Useful when outline is updated.
+        
+        Args:
+            title: Title to invalidate cache for
+        """
+        normalized_title = title.lower().strip()
+        
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("DELETE FROM content_cache WHERE LOWER(title) = ?", (normalized_title,))
+            deleted_count = cursor.rowcount
+            conn.commit()
+        
+        if deleted_count > 0:
+            logger.info(f"Invalidated {deleted_count} content cache entries for title: {title}")
+
+
+# Global persistent cache instance
+persistent_content_cache = PersistentContentCache()
--- a/backend/services/cache/persistent_outline_cache.py
+++ b/backend/services/cache/persistent_outline_cache.py
@@ -0,0 +1,332 @@
+"""
+Persistent Outline Cache Service
+
+Provides database-backed caching for outline generation results to survive server restarts
+and provide better cache management across multiple instances.
+"""
+
+import hashlib
+import json
+import sqlite3
+from typing import Dict, Any, Optional, List
+from datetime import datetime, timedelta
+from pathlib import Path
+from loguru import logger
+
+
+class PersistentOutlineCache:
+    """Database-backed cache for outline generation results with exact parameter matching."""
+    
+    def __init__(self, db_path: str = "outline_cache.db", max_cache_size: int = 500, cache_ttl_hours: int = 48):
+        """
+        Initialize the persistent outline cache.
+        
+        Args:
+            db_path: Path to SQLite database file
+            max_cache_size: Maximum number of cached entries
+            cache_ttl_hours: Time-to-live for cache entries in hours (longer than research cache)
+        """
+        self.db_path = db_path
+        self.max_cache_size = max_cache_size
+        self.cache_ttl = timedelta(hours=cache_ttl_hours)
+        
+        # Ensure database directory exists
+        Path(db_path).parent.mkdir(parents=True, exist_ok=True)
+        
+        # Initialize database
+        self._init_database()
+    
+    def _init_database(self):
+        """Initialize the SQLite database with required tables."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS outline_cache (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    cache_key TEXT UNIQUE NOT NULL,
+                    keywords TEXT NOT NULL,
+                    industry TEXT NOT NULL,
+                    target_audience TEXT NOT NULL,
+                    word_count INTEGER NOT NULL,
+                    custom_instructions TEXT,
+                    persona_data TEXT,
+                    result_data TEXT NOT NULL,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    expires_at TIMESTAMP NOT NULL,
+                    access_count INTEGER DEFAULT 0,
+                    last_accessed TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+            
+            # Create indexes for better performance
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_outline_cache_key ON outline_cache(cache_key)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_outline_expires_at ON outline_cache(expires_at)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_outline_created_at ON outline_cache(created_at)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_outline_keywords ON outline_cache(keywords)")
+            
+            conn.commit()
+    
+    def _generate_cache_key(self, keywords: List[str], industry: str, target_audience: str, 
+                           word_count: int, custom_instructions: str = None, persona_data: Dict = None) -> str:
+        """
+        Generate a cache key based on exact parameter match.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            word_count: Target word count for outline
+            custom_instructions: Custom instructions for outline generation
+            persona_data: Persona information
+            
+        Returns:
+            MD5 hash of the normalized parameters
+        """
+        # Normalize and sort keywords for consistent hashing
+        normalized_keywords = sorted([kw.lower().strip() for kw in keywords])
+        normalized_industry = industry.lower().strip() if industry else "general"
+        normalized_audience = target_audience.lower().strip() if target_audience else "general"
+        normalized_instructions = custom_instructions.lower().strip() if custom_instructions else ""
+        
+        # Normalize persona data
+        normalized_persona = ""
+        if persona_data:
+            # Sort persona keys and values for consistent hashing
+            persona_str = json.dumps(persona_data, sort_keys=True, default=str)
+            normalized_persona = persona_str.lower()
+        
+        # Create a consistent string representation
+        cache_string = f"{normalized_keywords}|{normalized_industry}|{normalized_audience}|{word_count}|{normalized_instructions}|{normalized_persona}"
+        
+        # Generate MD5 hash
+        return hashlib.md5(cache_string.encode('utf-8')).hexdigest()
+    
+    def _cleanup_expired_entries(self):
+        """Remove expired cache entries from database."""
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute(
+                "DELETE FROM outline_cache WHERE expires_at < ?",
+                (datetime.now().isoformat(),)
+            )
+            deleted_count = cursor.rowcount
+            if deleted_count > 0:
+                logger.debug(f"Removed {deleted_count} expired outline cache entries")
+            conn.commit()
+    
+    def _evict_oldest_entries(self, num_to_evict: int):
+        """Evict the oldest cache entries when cache is full."""
+        with sqlite3.connect(self.db_path) as conn:
+            # Get oldest entries by creation time
+            cursor = conn.execute("""
+                SELECT id FROM outline_cache 
+                ORDER BY created_at ASC 
+                LIMIT ?
+            """, (num_to_evict,))
+            
+            old_ids = [row[0] for row in cursor.fetchall()]
+            
+            if old_ids:
+                placeholders = ','.join(['?' for _ in old_ids])
+                conn.execute(f"DELETE FROM outline_cache WHERE id IN ({placeholders})", old_ids)
+                logger.debug(f"Evicted {len(old_ids)} oldest outline cache entries")
+            
+            conn.commit()
+    
+    def get_cached_outline(self, keywords: List[str], industry: str, target_audience: str, 
+                          word_count: int, custom_instructions: str = None, persona_data: Dict = None) -> Optional[Dict[str, Any]]:
+        """
+        Get cached outline result for exact parameter match.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            word_count: Target word count for outline
+            custom_instructions: Custom instructions for outline generation
+            persona_data: Persona information
+            
+        Returns:
+            Cached outline result if found and valid, None otherwise
+        """
+        cache_key = self._generate_cache_key(keywords, industry, target_audience, word_count, custom_instructions, persona_data)
+        
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("""
+                SELECT result_data, expires_at FROM outline_cache 
+                WHERE cache_key = ? AND expires_at > ?
+            """, (cache_key, datetime.now().isoformat()))
+            
+            row = cursor.fetchone()
+            
+            if row is None:
+                logger.debug(f"Outline cache miss for keywords: {keywords}, word_count: {word_count}")
+                return None
+            
+            # Update access statistics
+            conn.execute("""
+                UPDATE outline_cache 
+                SET access_count = access_count + 1, last_accessed = CURRENT_TIMESTAMP
+                WHERE cache_key = ?
+            """, (cache_key,))
+            conn.commit()
+            
+            try:
+                result_data = json.loads(row[0])
+                logger.info(f"Outline cache hit for keywords: {keywords}, word_count: {word_count} (saved expensive generation)")
+                return result_data
+            except json.JSONDecodeError:
+                logger.error(f"Invalid JSON in outline cache for keywords: {keywords}")
+                # Remove invalid entry
+                conn.execute("DELETE FROM outline_cache WHERE cache_key = ?", (cache_key,))
+                conn.commit()
+                return None
+    
+    def cache_outline(self, keywords: List[str], industry: str, target_audience: str, 
+                     word_count: int, custom_instructions: str, persona_data: Dict, result: Dict[str, Any]):
+        """
+        Cache an outline generation result.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            word_count: Target word count for outline
+            custom_instructions: Custom instructions for outline generation
+            persona_data: Persona information
+            result: Outline result to cache
+        """
+        cache_key = self._generate_cache_key(keywords, industry, target_audience, word_count, custom_instructions, persona_data)
+        
+        # Cleanup expired entries first
+        self._cleanup_expired_entries()
+        
+        # Check if cache is full and evict if necessary
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("SELECT COUNT(*) FROM outline_cache")
+            current_count = cursor.fetchone()[0]
+            
+            if current_count >= self.max_cache_size:
+                num_to_evict = current_count - self.max_cache_size + 1
+                self._evict_oldest_entries(num_to_evict)
+        
+        # Store the result
+        expires_at = datetime.now() + self.cache_ttl
+        
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                INSERT OR REPLACE INTO outline_cache 
+                (cache_key, keywords, industry, target_audience, word_count, custom_instructions, persona_data, result_data, expires_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """, (
+                cache_key,
+                json.dumps(keywords),
+                industry,
+                target_audience,
+                word_count,
+                custom_instructions or "",
+                json.dumps(persona_data) if persona_data else "",
+                json.dumps(result),
+                expires_at.isoformat()
+            ))
+            conn.commit()
+        
+        logger.info(f"Cached outline result for keywords: {keywords}, word_count: {word_count}")
+    
+    def get_cache_stats(self) -> Dict[str, Any]:
+        """Get cache statistics."""
+        self._cleanup_expired_entries()
+        
+        with sqlite3.connect(self.db_path) as conn:
+            # Get basic stats
+            cursor = conn.execute("SELECT COUNT(*) FROM outline_cache")
+            total_entries = cursor.fetchone()[0]
+            
+            cursor = conn.execute("SELECT COUNT(*) FROM outline_cache WHERE expires_at > ?", (datetime.now().isoformat(),))
+            valid_entries = cursor.fetchone()[0]
+            
+            # Get most accessed entries
+            cursor = conn.execute("""
+                SELECT keywords, industry, target_audience, word_count, access_count, created_at
+                FROM outline_cache 
+                ORDER BY access_count DESC 
+                LIMIT 10
+            """)
+            top_entries = [
+                {
+                    'keywords': json.loads(row[0]),
+                    'industry': row[1],
+                    'target_audience': row[2],
+                    'word_count': row[3],
+                    'access_count': row[4],
+                    'created_at': row[5]
+                }
+                for row in cursor.fetchall()
+            ]
+            
+            # Get database size
+            cursor = conn.execute("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()")
+            db_size_bytes = cursor.fetchone()[0]
+            db_size_mb = db_size_bytes / (1024 * 1024)
+        
+        return {
+            'total_entries': total_entries,
+            'valid_entries': valid_entries,
+            'expired_entries': total_entries - valid_entries,
+            'max_size': self.max_cache_size,
+            'ttl_hours': self.cache_ttl.total_seconds() / 3600,
+            'database_size_mb': round(db_size_mb, 2),
+            'top_accessed_entries': top_entries
+        }
+    
+    def clear_cache(self):
+        """Clear all cached entries."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("DELETE FROM outline_cache")
+            conn.commit()
+        logger.info("Outline cache cleared")
+    
+    def get_cache_entries(self, limit: int = 50) -> List[Dict[str, Any]]:
+        """Get recent cache entries for debugging."""
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("""
+                SELECT keywords, industry, target_audience, word_count, custom_instructions, created_at, expires_at, access_count
+                FROM outline_cache 
+                ORDER BY created_at DESC 
+                LIMIT ?
+            """, (limit,))
+            
+            return [
+                {
+                    'keywords': json.loads(row[0]),
+                    'industry': row[1],
+                    'target_audience': row[2],
+                    'word_count': row[3],
+                    'custom_instructions': row[4],
+                    'created_at': row[5],
+                    'expires_at': row[6],
+                    'access_count': row[7]
+                }
+                for row in cursor.fetchall()
+            ]
+    
+    def invalidate_cache_for_keywords(self, keywords: List[str]):
+        """
+        Invalidate all cache entries for specific keywords.
+        Useful when research data is updated.
+        
+        Args:
+            keywords: Keywords to invalidate cache for
+        """
+        normalized_keywords = sorted([kw.lower().strip() for kw in keywords])
+        keywords_json = json.dumps(normalized_keywords)
+        
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("DELETE FROM outline_cache WHERE keywords = ?", (keywords_json,))
+            deleted_count = cursor.rowcount
+            conn.commit()
+        
+        if deleted_count > 0:
+            logger.info(f"Invalidated {deleted_count} outline cache entries for keywords: {keywords}")
+
+
+# Global persistent cache instance
+persistent_outline_cache = PersistentOutlineCache()
--- a/backend/services/cache/persistent_research_cache.py
+++ b/backend/services/cache/persistent_research_cache.py
@@ -0,0 +1,283 @@
+"""
+Persistent Research Cache Service
+
+Provides database-backed caching for research results to survive server restarts
+and provide better cache management across multiple instances.
+"""
+
+import hashlib
+import json
+import sqlite3
+from typing import Dict, Any, Optional, List
+from datetime import datetime, timedelta
+from pathlib import Path
+from loguru import logger
+
+
+class PersistentResearchCache:
+    """Database-backed cache for research results with exact keyword matching."""
+    
+    def __init__(self, db_path: str = "research_cache.db", max_cache_size: int = 1000, cache_ttl_hours: int = 24):
+        """
+        Initialize the persistent research cache.
+        
+        Args:
+            db_path: Path to SQLite database file
+            max_cache_size: Maximum number of cached entries
+            cache_ttl_hours: Time-to-live for cache entries in hours
+        """
+        self.db_path = db_path
+        self.max_cache_size = max_cache_size
+        self.cache_ttl = timedelta(hours=cache_ttl_hours)
+        
+        # Ensure database directory exists
+        Path(db_path).parent.mkdir(parents=True, exist_ok=True)
+        
+        # Initialize database
+        self._init_database()
+    
+    def _init_database(self):
+        """Initialize the SQLite database with required tables."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS research_cache (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    cache_key TEXT UNIQUE NOT NULL,
+                    keywords TEXT NOT NULL,
+                    industry TEXT NOT NULL,
+                    target_audience TEXT NOT NULL,
+                    result_data TEXT NOT NULL,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    expires_at TIMESTAMP NOT NULL,
+                    access_count INTEGER DEFAULT 0,
+                    last_accessed TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+            
+            # Create indexes for better performance
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_key ON research_cache(cache_key)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_expires_at ON research_cache(expires_at)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_created_at ON research_cache(created_at)")
+            
+            conn.commit()
+    
+    def _generate_cache_key(self, keywords: List[str], industry: str, target_audience: str) -> str:
+        """
+        Generate a cache key based on exact keyword match.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            
+        Returns:
+            MD5 hash of the normalized parameters
+        """
+        # Normalize and sort keywords for consistent hashing
+        normalized_keywords = sorted([kw.lower().strip() for kw in keywords])
+        normalized_industry = industry.lower().strip() if industry else "general"
+        normalized_audience = target_audience.lower().strip() if target_audience else "general"
+        
+        # Create a consistent string representation
+        cache_string = f"{normalized_keywords}|{normalized_industry}|{normalized_audience}"
+        
+        # Generate MD5 hash
+        return hashlib.md5(cache_string.encode('utf-8')).hexdigest()
+    
+    def _cleanup_expired_entries(self):
+        """Remove expired cache entries from database."""
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute(
+                "DELETE FROM research_cache WHERE expires_at < ?",
+                (datetime.now().isoformat(),)
+            )
+            deleted_count = cursor.rowcount
+            if deleted_count > 0:
+                logger.debug(f"Removed {deleted_count} expired cache entries")
+            conn.commit()
+    
+    def _evict_oldest_entries(self, num_to_evict: int):
+        """Evict the oldest cache entries when cache is full."""
+        with sqlite3.connect(self.db_path) as conn:
+            # Get oldest entries by creation time
+            cursor = conn.execute("""
+                SELECT id FROM research_cache 
+                ORDER BY created_at ASC 
+                LIMIT ?
+            """, (num_to_evict,))
+            
+            old_ids = [row[0] for row in cursor.fetchall()]
+            
+            if old_ids:
+                placeholders = ','.join(['?' for _ in old_ids])
+                conn.execute(f"DELETE FROM research_cache WHERE id IN ({placeholders})", old_ids)
+                logger.debug(f"Evicted {len(old_ids)} oldest cache entries")
+            
+            conn.commit()
+    
+    def get_cached_result(self, keywords: List[str], industry: str, target_audience: str) -> Optional[Dict[str, Any]]:
+        """
+        Get cached research result for exact keyword match.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            
+        Returns:
+            Cached research result if found and valid, None otherwise
+        """
+        cache_key = self._generate_cache_key(keywords, industry, target_audience)
+        
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("""
+                SELECT result_data, expires_at FROM research_cache 
+                WHERE cache_key = ? AND expires_at > ?
+            """, (cache_key, datetime.now().isoformat()))
+            
+            row = cursor.fetchone()
+            
+            if row is None:
+                logger.debug(f"Cache miss for keywords: {keywords}")
+                return None
+            
+            # Update access statistics
+            conn.execute("""
+                UPDATE research_cache 
+                SET access_count = access_count + 1, last_accessed = CURRENT_TIMESTAMP
+                WHERE cache_key = ?
+            """, (cache_key,))
+            conn.commit()
+            
+            try:
+                result_data = json.loads(row[0])
+                logger.info(f"Cache hit for keywords: {keywords} (saved API call)")
+                return result_data
+            except json.JSONDecodeError:
+                logger.error(f"Invalid JSON in cache for keywords: {keywords}")
+                # Remove invalid entry
+                conn.execute("DELETE FROM research_cache WHERE cache_key = ?", (cache_key,))
+                conn.commit()
+                return None
+    
+    def cache_result(self, keywords: List[str], industry: str, target_audience: str, result: Dict[str, Any]):
+        """
+        Cache a research result.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            result: Research result to cache
+        """
+        cache_key = self._generate_cache_key(keywords, industry, target_audience)
+        
+        # Cleanup expired entries first
+        self._cleanup_expired_entries()
+        
+        # Check if cache is full and evict if necessary
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("SELECT COUNT(*) FROM research_cache")
+            current_count = cursor.fetchone()[0]
+            
+            if current_count >= self.max_cache_size:
+                num_to_evict = current_count - self.max_cache_size + 1
+                self._evict_oldest_entries(num_to_evict)
+        
+        # Store the result
+        expires_at = datetime.now() + self.cache_ttl
+        
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                INSERT OR REPLACE INTO research_cache 
+                (cache_key, keywords, industry, target_audience, result_data, expires_at)
+                VALUES (?, ?, ?, ?, ?, ?)
+            """, (
+                cache_key,
+                json.dumps(keywords),
+                industry,
+                target_audience,
+                json.dumps(result),
+                expires_at.isoformat()
+            ))
+            conn.commit()
+        
+        logger.info(f"Cached research result for keywords: {keywords}")
+    
+    def get_cache_stats(self) -> Dict[str, Any]:
+        """Get cache statistics."""
+        self._cleanup_expired_entries()
+        
+        with sqlite3.connect(self.db_path) as conn:
+            # Get basic stats
+            cursor = conn.execute("SELECT COUNT(*) FROM research_cache")
+            total_entries = cursor.fetchone()[0]
+            
+            cursor = conn.execute("SELECT COUNT(*) FROM research_cache WHERE expires_at > ?", (datetime.now().isoformat(),))
+            valid_entries = cursor.fetchone()[0]
+            
+            # Get most accessed entries
+            cursor = conn.execute("""
+                SELECT keywords, industry, target_audience, access_count, created_at
+                FROM research_cache 
+                ORDER BY access_count DESC 
+                LIMIT 10
+            """)
+            top_entries = [
+                {
+                    'keywords': json.loads(row[0]),
+                    'industry': row[1],
+                    'target_audience': row[2],
+                    'access_count': row[3],
+                    'created_at': row[4]
+                }
+                for row in cursor.fetchall()
+            ]
+            
+            # Get database size
+            cursor = conn.execute("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()")
+            db_size_bytes = cursor.fetchone()[0]
+            db_size_mb = db_size_bytes / (1024 * 1024)
+        
+        return {
+            'total_entries': total_entries,
+            'valid_entries': valid_entries,
+            'expired_entries': total_entries - valid_entries,
+            'max_size': self.max_cache_size,
+            'ttl_hours': self.cache_ttl.total_seconds() / 3600,
+            'database_size_mb': round(db_size_mb, 2),
+            'top_accessed_entries': top_entries
+        }
+    
+    def clear_cache(self):
+        """Clear all cached entries."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("DELETE FROM research_cache")
+            conn.commit()
+        logger.info("Research cache cleared")
+    
+    def get_cache_entries(self, limit: int = 50) -> List[Dict[str, Any]]:
+        """Get recent cache entries for debugging."""
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("""
+                SELECT keywords, industry, target_audience, created_at, expires_at, access_count
+                FROM research_cache 
+                ORDER BY created_at DESC 
+                LIMIT ?
+            """, (limit,))
+            
+            return [
+                {
+                    'keywords': json.loads(row[0]),
+                    'industry': row[1],
+                    'target_audience': row[2],
+                    'created_at': row[3],
+                    'expires_at': row[4],
+                    'access_count': row[5]
+                }
+                for row in cursor.fetchall()
+            ]
+
+
+# Global persistent cache instance
+persistent_research_cache = PersistentResearchCache()
--- a/backend/services/cache/research_cache.py
+++ b/backend/services/cache/research_cache.py
@@ -0,0 +1,172 @@
+"""
+Research Cache Service
+
+Provides intelligent caching for Google grounded research results to reduce API costs.
+Only returns cached results for exact keyword matches to ensure accuracy.
+"""
+
+import hashlib
+import json
+from typing import Dict, Any, Optional, List
+from datetime import datetime, timedelta
+from loguru import logger
+
+
+class ResearchCache:
+    """Cache for research results with exact keyword matching."""
+    
+    def __init__(self, max_cache_size: int = 100, cache_ttl_hours: int = 24):
+        """
+        Initialize the research cache.
+        
+        Args:
+            max_cache_size: Maximum number of cached entries
+            cache_ttl_hours: Time-to-live for cache entries in hours
+        """
+        self.cache: Dict[str, Dict[str, Any]] = {}
+        self.max_cache_size = max_cache_size
+        self.cache_ttl = timedelta(hours=cache_ttl_hours)
+    
+    def _generate_cache_key(self, keywords: List[str], industry: str, target_audience: str) -> str:
+        """
+        Generate a cache key based on exact keyword match.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            
+        Returns:
+            MD5 hash of the normalized parameters
+        """
+        # Normalize and sort keywords for consistent hashing
+        normalized_keywords = sorted([kw.lower().strip() for kw in keywords])
+        normalized_industry = industry.lower().strip() if industry else "general"
+        normalized_audience = target_audience.lower().strip() if target_audience else "general"
+        
+        # Create a consistent string representation
+        cache_string = f"{normalized_keywords}|{normalized_industry}|{normalized_audience}"
+        
+        # Generate MD5 hash
+        return hashlib.md5(cache_string.encode('utf-8')).hexdigest()
+    
+    def _is_cache_entry_valid(self, entry: Dict[str, Any]) -> bool:
+        """Check if a cache entry is still valid (not expired)."""
+        if 'created_at' not in entry:
+            return False
+        
+        created_at = datetime.fromisoformat(entry['created_at'])
+        return datetime.now() - created_at < self.cache_ttl
+    
+    def _cleanup_expired_entries(self):
+        """Remove expired cache entries."""
+        expired_keys = []
+        for key, entry in self.cache.items():
+            if not self._is_cache_entry_valid(entry):
+                expired_keys.append(key)
+        
+        for key in expired_keys:
+            del self.cache[key]
+            logger.debug(f"Removed expired cache entry: {key}")
+    
+    def _evict_oldest_entries(self, num_to_evict: int):
+        """Evict the oldest cache entries when cache is full."""
+        # Sort by creation time and remove oldest entries
+        sorted_entries = sorted(
+            self.cache.items(),
+            key=lambda x: x[1].get('created_at', ''),
+            reverse=False
+        )
+        
+        for i in range(min(num_to_evict, len(sorted_entries))):
+            key = sorted_entries[i][0]
+            del self.cache[key]
+            logger.debug(f"Evicted oldest cache entry: {key}")
+    
+    def get_cached_result(self, keywords: List[str], industry: str, target_audience: str) -> Optional[Dict[str, Any]]:
+        """
+        Get cached research result for exact keyword match.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            
+        Returns:
+            Cached research result if found and valid, None otherwise
+        """
+        cache_key = self._generate_cache_key(keywords, industry, target_audience)
+        
+        if cache_key not in self.cache:
+            logger.debug(f"Cache miss for keywords: {keywords}")
+            return None
+        
+        entry = self.cache[cache_key]
+        
+        # Check if entry is still valid
+        if not self._is_cache_entry_valid(entry):
+            del self.cache[cache_key]
+            logger.debug(f"Cache entry expired for keywords: {keywords}")
+            return None
+        
+        logger.info(f"Cache hit for keywords: {keywords} (saved API call)")
+        return entry.get('result')
+    
+    def cache_result(self, keywords: List[str], industry: str, target_audience: str, result: Dict[str, Any]):
+        """
+        Cache a research result.
+        
+        Args:
+            keywords: List of research keywords
+            industry: Industry context
+            target_audience: Target audience context
+            result: Research result to cache
+        """
+        cache_key = self._generate_cache_key(keywords, industry, target_audience)
+        
+        # Cleanup expired entries first
+        self._cleanup_expired_entries()
+        
+        # Check if cache is full and evict if necessary
+        if len(self.cache) >= self.max_cache_size:
+            num_to_evict = len(self.cache) - self.max_cache_size + 1
+            self._evict_oldest_entries(num_to_evict)
+        
+        # Store the result
+        self.cache[cache_key] = {
+            'result': result,
+            'created_at': datetime.now().isoformat(),
+            'keywords': keywords,
+            'industry': industry,
+            'target_audience': target_audience
+        }
+        
+        logger.info(f"Cached research result for keywords: {keywords}")
+    
+    def get_cache_stats(self) -> Dict[str, Any]:
+        """Get cache statistics."""
+        self._cleanup_expired_entries()
+        
+        return {
+            'total_entries': len(self.cache),
+            'max_size': self.max_cache_size,
+            'ttl_hours': self.cache_ttl.total_seconds() / 3600,
+            'entries': [
+                {
+                    'keywords': entry['keywords'],
+                    'industry': entry['industry'],
+                    'target_audience': entry['target_audience'],
+                    'created_at': entry['created_at']
+                }
+                for entry in self.cache.values()
+            ]
+        }
+    
+    def clear_cache(self):
+        """Clear all cached entries."""
+        self.cache.clear()
+        logger.info("Research cache cleared")
+
+
+# Global cache instance
+research_cache = ResearchCache()