Base code
This commit is contained in:
172
backend/services/cache/research_cache.py
vendored
Normal file
172
backend/services/cache/research_cache.py
vendored
Normal file
@@ -0,0 +1,172 @@
|
||||
"""
|
||||
Research Cache Service
|
||||
|
||||
Provides intelligent caching for Google grounded research results to reduce API costs.
|
||||
Only returns cached results for exact keyword matches to ensure accuracy.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime, timedelta
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class ResearchCache:
|
||||
"""Cache for research results with exact keyword matching."""
|
||||
|
||||
def __init__(self, max_cache_size: int = 100, cache_ttl_hours: int = 24):
|
||||
"""
|
||||
Initialize the research cache.
|
||||
|
||||
Args:
|
||||
max_cache_size: Maximum number of cached entries
|
||||
cache_ttl_hours: Time-to-live for cache entries in hours
|
||||
"""
|
||||
self.cache: Dict[str, Dict[str, Any]] = {}
|
||||
self.max_cache_size = max_cache_size
|
||||
self.cache_ttl = timedelta(hours=cache_ttl_hours)
|
||||
|
||||
def _generate_cache_key(self, keywords: List[str], industry: str, target_audience: str) -> str:
|
||||
"""
|
||||
Generate a cache key based on exact keyword match.
|
||||
|
||||
Args:
|
||||
keywords: List of research keywords
|
||||
industry: Industry context
|
||||
target_audience: Target audience context
|
||||
|
||||
Returns:
|
||||
MD5 hash of the normalized parameters
|
||||
"""
|
||||
# Normalize and sort keywords for consistent hashing
|
||||
normalized_keywords = sorted([kw.lower().strip() for kw in keywords])
|
||||
normalized_industry = industry.lower().strip() if industry else "general"
|
||||
normalized_audience = target_audience.lower().strip() if target_audience else "general"
|
||||
|
||||
# Create a consistent string representation
|
||||
cache_string = f"{normalized_keywords}|{normalized_industry}|{normalized_audience}"
|
||||
|
||||
# Generate MD5 hash
|
||||
return hashlib.md5(cache_string.encode('utf-8')).hexdigest()
|
||||
|
||||
def _is_cache_entry_valid(self, entry: Dict[str, Any]) -> bool:
|
||||
"""Check if a cache entry is still valid (not expired)."""
|
||||
if 'created_at' not in entry:
|
||||
return False
|
||||
|
||||
created_at = datetime.fromisoformat(entry['created_at'])
|
||||
return datetime.now() - created_at < self.cache_ttl
|
||||
|
||||
def _cleanup_expired_entries(self):
|
||||
"""Remove expired cache entries."""
|
||||
expired_keys = []
|
||||
for key, entry in self.cache.items():
|
||||
if not self._is_cache_entry_valid(entry):
|
||||
expired_keys.append(key)
|
||||
|
||||
for key in expired_keys:
|
||||
del self.cache[key]
|
||||
logger.debug(f"Removed expired cache entry: {key}")
|
||||
|
||||
def _evict_oldest_entries(self, num_to_evict: int):
|
||||
"""Evict the oldest cache entries when cache is full."""
|
||||
# Sort by creation time and remove oldest entries
|
||||
sorted_entries = sorted(
|
||||
self.cache.items(),
|
||||
key=lambda x: x[1].get('created_at', ''),
|
||||
reverse=False
|
||||
)
|
||||
|
||||
for i in range(min(num_to_evict, len(sorted_entries))):
|
||||
key = sorted_entries[i][0]
|
||||
del self.cache[key]
|
||||
logger.debug(f"Evicted oldest cache entry: {key}")
|
||||
|
||||
def get_cached_result(self, keywords: List[str], industry: str, target_audience: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get cached research result for exact keyword match.
|
||||
|
||||
Args:
|
||||
keywords: List of research keywords
|
||||
industry: Industry context
|
||||
target_audience: Target audience context
|
||||
|
||||
Returns:
|
||||
Cached research result if found and valid, None otherwise
|
||||
"""
|
||||
cache_key = self._generate_cache_key(keywords, industry, target_audience)
|
||||
|
||||
if cache_key not in self.cache:
|
||||
logger.debug(f"Cache miss for keywords: {keywords}")
|
||||
return None
|
||||
|
||||
entry = self.cache[cache_key]
|
||||
|
||||
# Check if entry is still valid
|
||||
if not self._is_cache_entry_valid(entry):
|
||||
del self.cache[cache_key]
|
||||
logger.debug(f"Cache entry expired for keywords: {keywords}")
|
||||
return None
|
||||
|
||||
logger.info(f"Cache hit for keywords: {keywords} (saved API call)")
|
||||
return entry.get('result')
|
||||
|
||||
def cache_result(self, keywords: List[str], industry: str, target_audience: str, result: Dict[str, Any]):
|
||||
"""
|
||||
Cache a research result.
|
||||
|
||||
Args:
|
||||
keywords: List of research keywords
|
||||
industry: Industry context
|
||||
target_audience: Target audience context
|
||||
result: Research result to cache
|
||||
"""
|
||||
cache_key = self._generate_cache_key(keywords, industry, target_audience)
|
||||
|
||||
# Cleanup expired entries first
|
||||
self._cleanup_expired_entries()
|
||||
|
||||
# Check if cache is full and evict if necessary
|
||||
if len(self.cache) >= self.max_cache_size:
|
||||
num_to_evict = len(self.cache) - self.max_cache_size + 1
|
||||
self._evict_oldest_entries(num_to_evict)
|
||||
|
||||
# Store the result
|
||||
self.cache[cache_key] = {
|
||||
'result': result,
|
||||
'created_at': datetime.now().isoformat(),
|
||||
'keywords': keywords,
|
||||
'industry': industry,
|
||||
'target_audience': target_audience
|
||||
}
|
||||
|
||||
logger.info(f"Cached research result for keywords: {keywords}")
|
||||
|
||||
def get_cache_stats(self) -> Dict[str, Any]:
|
||||
"""Get cache statistics."""
|
||||
self._cleanup_expired_entries()
|
||||
|
||||
return {
|
||||
'total_entries': len(self.cache),
|
||||
'max_size': self.max_cache_size,
|
||||
'ttl_hours': self.cache_ttl.total_seconds() / 3600,
|
||||
'entries': [
|
||||
{
|
||||
'keywords': entry['keywords'],
|
||||
'industry': entry['industry'],
|
||||
'target_audience': entry['target_audience'],
|
||||
'created_at': entry['created_at']
|
||||
}
|
||||
for entry in self.cache.values()
|
||||
]
|
||||
}
|
||||
|
||||
def clear_cache(self):
|
||||
"""Clear all cached entries."""
|
||||
self.cache.clear()
|
||||
logger.info("Research cache cleared")
|
||||
|
||||
|
||||
# Global cache instance
|
||||
research_cache = ResearchCache()
|
||||
Reference in New Issue
Block a user