ALwrity Version 0.5.0 (Fastapi + React )
This commit is contained in:
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
Performance Module
|
||||
Caching, optimization, and health monitoring services.
|
||||
"""
|
||||
|
||||
from .caching import CachingService
|
||||
from .optimization import PerformanceOptimizationService
|
||||
from .health_monitoring import HealthMonitoringService
|
||||
|
||||
__all__ = ['CachingService', 'PerformanceOptimizationService', 'HealthMonitoringService']
|
||||
@@ -0,0 +1,469 @@
|
||||
"""
|
||||
Caching Service
|
||||
Cache management and optimization.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import hashlib
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Try to import Redis, fallback to in-memory if not available
|
||||
try:
|
||||
import redis
|
||||
REDIS_AVAILABLE = True
|
||||
except ImportError:
|
||||
REDIS_AVAILABLE = False
|
||||
logger.warning("Redis not available, using in-memory caching")
|
||||
|
||||
class CachingService:
|
||||
"""Service for intelligent caching of content strategy data."""
|
||||
|
||||
def __init__(self):
|
||||
# Cache configuration
|
||||
self.cache_config = {
|
||||
'ai_analysis': {
|
||||
'ttl': 3600, # 1 hour
|
||||
'max_size': 1000,
|
||||
'priority': 'high'
|
||||
},
|
||||
'onboarding_data': {
|
||||
'ttl': 1800, # 30 minutes
|
||||
'max_size': 500,
|
||||
'priority': 'medium'
|
||||
},
|
||||
'strategy_cache': {
|
||||
'ttl': 7200, # 2 hours
|
||||
'max_size': 200,
|
||||
'priority': 'high'
|
||||
},
|
||||
'field_transformations': {
|
||||
'ttl': 900, # 15 minutes
|
||||
'max_size': 1000,
|
||||
'priority': 'low'
|
||||
}
|
||||
}
|
||||
|
||||
# Initialize Redis connection if available
|
||||
self.redis_available = False
|
||||
if REDIS_AVAILABLE:
|
||||
try:
|
||||
self.redis_client = redis.Redis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=0,
|
||||
decode_responses=True,
|
||||
socket_connect_timeout=5,
|
||||
socket_timeout=5
|
||||
)
|
||||
# Test connection
|
||||
self.redis_client.ping()
|
||||
self.redis_available = True
|
||||
logger.info("Redis connection established successfully")
|
||||
except Exception as e:
|
||||
logger.warning(f"Redis connection failed: {str(e)}. Using in-memory cache.")
|
||||
self.redis_available = False
|
||||
self.memory_cache = {}
|
||||
else:
|
||||
logger.info("Using in-memory cache (Redis not available)")
|
||||
self.memory_cache = {}
|
||||
|
||||
def get_cache_key(self, cache_type: str, identifier: str, **kwargs) -> str:
|
||||
"""Generate a unique cache key."""
|
||||
try:
|
||||
# Create a hash of the identifier and additional parameters
|
||||
key_data = f"{cache_type}:{identifier}"
|
||||
if kwargs:
|
||||
key_data += ":" + json.dumps(kwargs, sort_keys=True)
|
||||
|
||||
# Create hash for consistent key length
|
||||
key_hash = hashlib.md5(key_data.encode()).hexdigest()
|
||||
return f"content_strategy:{cache_type}:{key_hash}"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating cache key: {str(e)}")
|
||||
return f"content_strategy:{cache_type}:{identifier}"
|
||||
|
||||
async def get_cached_data(self, cache_type: str, identifier: str, **kwargs) -> Optional[Dict[str, Any]]:
|
||||
"""Retrieve cached data."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return self._get_from_memory_cache(cache_type, identifier, **kwargs)
|
||||
|
||||
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
|
||||
cached_data = self.redis_client.get(cache_key)
|
||||
|
||||
if cached_data:
|
||||
data = json.loads(cached_data)
|
||||
logger.info(f"Cache hit for {cache_type}:{identifier}")
|
||||
return data
|
||||
else:
|
||||
logger.info(f"Cache miss for {cache_type}:{identifier}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving cached data: {str(e)}")
|
||||
return None
|
||||
|
||||
async def set_cached_data(self, cache_type: str, identifier: str, data: Dict[str, Any], **kwargs) -> bool:
|
||||
"""Store data in cache."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return self._set_in_memory_cache(cache_type, identifier, data, **kwargs)
|
||||
|
||||
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
|
||||
ttl = self.cache_config.get(cache_type, {}).get('ttl', 3600)
|
||||
|
||||
# Add metadata to cached data
|
||||
cached_data = {
|
||||
'data': data,
|
||||
'metadata': {
|
||||
'cached_at': datetime.utcnow().isoformat(),
|
||||
'cache_type': cache_type,
|
||||
'identifier': identifier,
|
||||
'ttl': ttl
|
||||
}
|
||||
}
|
||||
|
||||
# Store in Redis with TTL
|
||||
result = self.redis_client.setex(
|
||||
cache_key,
|
||||
ttl,
|
||||
json.dumps(cached_data, default=str)
|
||||
)
|
||||
|
||||
if result:
|
||||
logger.info(f"Data cached successfully for {cache_type}:{identifier}")
|
||||
await self._update_cache_stats(cache_type, 'set')
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"Failed to cache data for {cache_type}:{identifier}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting cached data: {str(e)}")
|
||||
return False
|
||||
|
||||
async def invalidate_cache(self, cache_type: str, identifier: str, **kwargs) -> bool:
|
||||
"""Invalidate specific cached data."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return self._invalidate_memory_cache(cache_type, identifier, **kwargs)
|
||||
|
||||
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
|
||||
result = self.redis_client.delete(cache_key)
|
||||
|
||||
if result:
|
||||
logger.info(f"Cache invalidated for {cache_type}:{identifier}")
|
||||
await self._update_cache_stats(cache_type, 'invalidate')
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"No cache entry found to invalidate for {cache_type}:{identifier}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error invalidating cache: {str(e)}")
|
||||
return False
|
||||
|
||||
async def clear_cache_type(self, cache_type: str) -> bool:
|
||||
"""Clear all cached data of a specific type."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return self._clear_memory_cache_type(cache_type)
|
||||
|
||||
pattern = f"content_strategy:{cache_type}:*"
|
||||
keys = self.redis_client.keys(pattern)
|
||||
|
||||
if keys:
|
||||
result = self.redis_client.delete(*keys)
|
||||
logger.info(f"Cleared {result} cache entries for {cache_type}")
|
||||
await self._update_cache_stats(cache_type, 'clear')
|
||||
return True
|
||||
else:
|
||||
logger.info(f"No cache entries found for {cache_type}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing cache type {cache_type}: {str(e)}")
|
||||
return False
|
||||
|
||||
async def get_cache_stats(self, cache_type: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Get cache statistics."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return self._get_memory_cache_stats(cache_type)
|
||||
|
||||
stats = {}
|
||||
|
||||
if cache_type:
|
||||
pattern = f"content_strategy:{cache_type}:*"
|
||||
keys = self.redis_client.keys(pattern)
|
||||
stats[cache_type] = {
|
||||
'entries': len(keys),
|
||||
'size_bytes': sum(len(self.redis_client.get(key) or '') for key in keys),
|
||||
'config': self.cache_config.get(cache_type, {})
|
||||
}
|
||||
else:
|
||||
for cache_type_name in self.cache_config.keys():
|
||||
pattern = f"content_strategy:{cache_type_name}:*"
|
||||
keys = self.redis_client.keys(pattern)
|
||||
stats[cache_type_name] = {
|
||||
'entries': len(keys),
|
||||
'size_bytes': sum(len(self.redis_client.get(key) or '') for key in keys),
|
||||
'config': self.cache_config.get(cache_type_name, {})
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting cache stats: {str(e)}")
|
||||
return {}
|
||||
|
||||
async def optimize_cache(self) -> Dict[str, Any]:
|
||||
"""Optimize cache by removing expired entries and managing memory."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return self._optimize_memory_cache()
|
||||
|
||||
optimization_results = {}
|
||||
|
||||
for cache_type, config in self.cache_config.items():
|
||||
pattern = f"content_strategy:{cache_type}:*"
|
||||
keys = self.redis_client.keys(pattern)
|
||||
|
||||
if len(keys) > config.get('max_size', 1000):
|
||||
# Remove oldest entries to maintain max size
|
||||
keys_with_times = []
|
||||
for key in keys:
|
||||
ttl = self.redis_client.ttl(key)
|
||||
if ttl > 0: # Key still has TTL
|
||||
keys_with_times.append((key, ttl))
|
||||
|
||||
# Sort by TTL (oldest first)
|
||||
keys_with_times.sort(key=lambda x: x[1])
|
||||
|
||||
# Remove excess entries
|
||||
excess_count = len(keys) - config.get('max_size', 1000)
|
||||
keys_to_remove = [key for key, _ in keys_with_times[:excess_count]]
|
||||
|
||||
if keys_to_remove:
|
||||
removed_count = self.redis_client.delete(*keys_to_remove)
|
||||
optimization_results[cache_type] = {
|
||||
'entries_removed': removed_count,
|
||||
'reason': 'max_size_exceeded'
|
||||
}
|
||||
logger.info(f"Optimized {cache_type} cache: removed {removed_count} entries")
|
||||
|
||||
return optimization_results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing cache: {str(e)}")
|
||||
return {}
|
||||
|
||||
async def _update_cache_stats(self, cache_type: str, operation: str) -> None:
|
||||
"""Update cache statistics."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return
|
||||
|
||||
stats_key = f"cache_stats:{cache_type}"
|
||||
current_stats = self.redis_client.hgetall(stats_key)
|
||||
|
||||
# Update operation counts
|
||||
current_stats[f"{operation}_count"] = str(int(current_stats.get(f"{operation}_count", 0)) + 1)
|
||||
current_stats['last_updated'] = datetime.utcnow().isoformat()
|
||||
|
||||
# Store updated stats
|
||||
self.redis_client.hset(stats_key, mapping=current_stats)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating cache stats: {str(e)}")
|
||||
|
||||
# Memory cache fallback methods
|
||||
def _get_from_memory_cache(self, cache_type: str, identifier: str, **kwargs) -> Optional[Dict[str, Any]]:
|
||||
"""Get data from memory cache."""
|
||||
try:
|
||||
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
|
||||
cached_data = self.memory_cache.get(cache_key)
|
||||
|
||||
if cached_data:
|
||||
# Check if data is still valid
|
||||
cached_at = datetime.fromisoformat(cached_data['metadata']['cached_at'])
|
||||
ttl = cached_data['metadata']['ttl']
|
||||
|
||||
if datetime.utcnow() - cached_at < timedelta(seconds=ttl):
|
||||
logger.info(f"Memory cache hit for {cache_type}:{identifier}")
|
||||
return cached_data['data']
|
||||
else:
|
||||
# Remove expired entry
|
||||
del self.memory_cache[cache_key]
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting from memory cache: {str(e)}")
|
||||
return None
|
||||
|
||||
def _set_in_memory_cache(self, cache_type: str, identifier: str, data: Dict[str, Any], **kwargs) -> bool:
|
||||
"""Set data in memory cache."""
|
||||
try:
|
||||
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
|
||||
ttl = self.cache_config.get(cache_type, {}).get('ttl', 3600)
|
||||
|
||||
cached_data = {
|
||||
'data': data,
|
||||
'metadata': {
|
||||
'cached_at': datetime.utcnow().isoformat(),
|
||||
'cache_type': cache_type,
|
||||
'identifier': identifier,
|
||||
'ttl': ttl
|
||||
}
|
||||
}
|
||||
|
||||
# Check max size and remove oldest if needed
|
||||
max_size = self.cache_config.get(cache_type, {}).get('max_size', 1000)
|
||||
if len(self.memory_cache) >= max_size:
|
||||
# Remove oldest entry
|
||||
oldest_key = min(self.memory_cache.keys(),
|
||||
key=lambda k: self.memory_cache[k]['metadata']['cached_at'])
|
||||
del self.memory_cache[oldest_key]
|
||||
|
||||
self.memory_cache[cache_key] = cached_data
|
||||
logger.info(f"Data cached in memory for {cache_type}:{identifier}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting in memory cache: {str(e)}")
|
||||
return False
|
||||
|
||||
def _invalidate_memory_cache(self, cache_type: str, identifier: str, **kwargs) -> bool:
|
||||
"""Invalidate memory cache entry."""
|
||||
try:
|
||||
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
|
||||
if cache_key in self.memory_cache:
|
||||
del self.memory_cache[cache_key]
|
||||
logger.info(f"Memory cache invalidated for {cache_type}:{identifier}")
|
||||
return True
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error invalidating memory cache: {str(e)}")
|
||||
return False
|
||||
|
||||
def _clear_memory_cache_type(self, cache_type: str) -> bool:
|
||||
"""Clear memory cache by type."""
|
||||
try:
|
||||
keys_to_remove = [key for key in self.memory_cache.keys()
|
||||
if key.startswith(f"content_strategy:{cache_type}:")]
|
||||
|
||||
for key in keys_to_remove:
|
||||
del self.memory_cache[key]
|
||||
|
||||
logger.info(f"Cleared {len(keys_to_remove)} memory cache entries for {cache_type}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing memory cache type: {str(e)}")
|
||||
return False
|
||||
|
||||
def _get_memory_cache_stats(self, cache_type: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Get memory cache statistics."""
|
||||
try:
|
||||
stats = {}
|
||||
|
||||
if cache_type:
|
||||
keys = [key for key in self.memory_cache.keys()
|
||||
if key.startswith(f"content_strategy:{cache_type}:")]
|
||||
stats[cache_type] = {
|
||||
'entries': len(keys),
|
||||
'size_bytes': sum(len(str(value)) for value in [self.memory_cache[key] for key in keys]),
|
||||
'config': self.cache_config.get(cache_type, {})
|
||||
}
|
||||
else:
|
||||
for cache_type_name in self.cache_config.keys():
|
||||
keys = [key for key in self.memory_cache.keys()
|
||||
if key.startswith(f"content_strategy:{cache_type_name}:")]
|
||||
stats[cache_type_name] = {
|
||||
'entries': len(keys),
|
||||
'size_bytes': sum(len(str(value)) for value in [self.memory_cache[key] for key in keys]),
|
||||
'config': self.cache_config.get(cache_type_name, {})
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting memory cache stats: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _optimize_memory_cache(self) -> Dict[str, Any]:
|
||||
"""Optimize memory cache."""
|
||||
try:
|
||||
optimization_results = {}
|
||||
|
||||
for cache_type, config in self.cache_config.items():
|
||||
keys = [key for key in self.memory_cache.keys()
|
||||
if key.startswith(f"content_strategy:{cache_type}:")]
|
||||
|
||||
if len(keys) > config.get('max_size', 1000):
|
||||
# Remove oldest entries
|
||||
keys_with_times = []
|
||||
for key in keys:
|
||||
cached_at = datetime.fromisoformat(self.memory_cache[key]['metadata']['cached_at'])
|
||||
keys_with_times.append((key, cached_at))
|
||||
|
||||
# Sort by cached time (oldest first)
|
||||
keys_with_times.sort(key=lambda x: x[1])
|
||||
|
||||
# Remove excess entries
|
||||
excess_count = len(keys) - config.get('max_size', 1000)
|
||||
keys_to_remove = [key for key, _ in keys_with_times[:excess_count]]
|
||||
|
||||
for key in keys_to_remove:
|
||||
del self.memory_cache[key]
|
||||
|
||||
optimization_results[cache_type] = {
|
||||
'entries_removed': len(keys_to_remove),
|
||||
'reason': 'max_size_exceeded'
|
||||
}
|
||||
|
||||
return optimization_results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing memory cache: {str(e)}")
|
||||
return {}
|
||||
|
||||
# Cache-specific methods for different data types
|
||||
async def cache_ai_analysis(self, user_id: int, analysis_type: str, analysis_data: Dict[str, Any]) -> bool:
|
||||
"""Cache AI analysis results."""
|
||||
return await self.set_cached_data('ai_analysis', f"{user_id}:{analysis_type}", analysis_data)
|
||||
|
||||
async def get_cached_ai_analysis(self, user_id: int, analysis_type: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached AI analysis results."""
|
||||
return await self.get_cached_data('ai_analysis', f"{user_id}:{analysis_type}")
|
||||
|
||||
async def cache_onboarding_data(self, user_id: int, onboarding_data: Dict[str, Any]) -> bool:
|
||||
"""Cache onboarding data."""
|
||||
return await self.set_cached_data('onboarding_data', str(user_id), onboarding_data)
|
||||
|
||||
async def get_cached_onboarding_data(self, user_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached onboarding data."""
|
||||
return await self.get_cached_data('onboarding_data', str(user_id))
|
||||
|
||||
async def cache_strategy(self, strategy_id: int, strategy_data: Dict[str, Any]) -> bool:
|
||||
"""Cache strategy data."""
|
||||
return await self.set_cached_data('strategy_cache', str(strategy_id), strategy_data)
|
||||
|
||||
async def get_cached_strategy(self, strategy_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached strategy data."""
|
||||
return await self.get_cached_data('strategy_cache', str(strategy_id))
|
||||
|
||||
async def cache_field_transformations(self, user_id: int, transformations: Dict[str, Any]) -> bool:
|
||||
"""Cache field transformations."""
|
||||
return await self.set_cached_data('field_transformations', str(user_id), transformations)
|
||||
|
||||
async def get_cached_field_transformations(self, user_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached field transformations."""
|
||||
return await self.get_cached_data('field_transformations', str(user_id))
|
||||
@@ -0,0 +1,503 @@
|
||||
"""
|
||||
Health Monitoring Service
|
||||
System health monitoring and performance tracking.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import asyncio
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class HealthMonitoringService:
|
||||
"""Service for system health monitoring and assessment."""
|
||||
|
||||
def __init__(self):
|
||||
self.health_thresholds = {
|
||||
'database_response_time': 1.0, # seconds
|
||||
'cache_response_time': 0.1, # seconds
|
||||
'ai_service_response_time': 5.0, # seconds
|
||||
'memory_usage_threshold': 80, # percentage
|
||||
'cpu_usage_threshold': 80, # percentage
|
||||
'disk_usage_threshold': 90, # percentage
|
||||
'error_rate_threshold': 0.05 # 5%
|
||||
}
|
||||
|
||||
self.health_status = {
|
||||
'timestamp': None,
|
||||
'overall_status': 'healthy',
|
||||
'components': {},
|
||||
'alerts': [],
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
async def check_system_health(self, db: Session, cache_service=None, ai_service=None) -> Dict[str, Any]:
|
||||
"""Perform comprehensive system health check."""
|
||||
try:
|
||||
logger.info("Starting comprehensive system health check")
|
||||
|
||||
health_report = {
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'overall_status': 'healthy',
|
||||
'components': {},
|
||||
'alerts': [],
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
# Check database health
|
||||
db_health = await self._check_database_health(db)
|
||||
health_report['components']['database'] = db_health
|
||||
|
||||
# Check cache health
|
||||
if cache_service:
|
||||
cache_health = await self._check_cache_health(cache_service)
|
||||
health_report['components']['cache'] = cache_health
|
||||
else:
|
||||
health_report['components']['cache'] = {'status': 'not_available', 'message': 'Cache service not provided'}
|
||||
|
||||
# Check AI service health
|
||||
if ai_service:
|
||||
ai_health = await self._check_ai_service_health(ai_service)
|
||||
health_report['components']['ai_service'] = ai_health
|
||||
else:
|
||||
health_report['components']['ai_service'] = {'status': 'not_available', 'message': 'AI service not provided'}
|
||||
|
||||
# Check system resources
|
||||
system_health = await self._check_system_resources()
|
||||
health_report['components']['system'] = system_health
|
||||
|
||||
# Determine overall status
|
||||
health_report['overall_status'] = self._determine_overall_health(health_report['components'])
|
||||
|
||||
# Generate alerts and recommendations
|
||||
health_report['alerts'] = self._generate_health_alerts(health_report['components'])
|
||||
health_report['recommendations'] = await self._generate_health_recommendations(health_report['components'])
|
||||
|
||||
# Update health status
|
||||
self.health_status = health_report
|
||||
|
||||
logger.info(f"System health check completed. Overall status: {health_report['overall_status']}")
|
||||
return health_report
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during system health check: {str(e)}")
|
||||
return {
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'overall_status': 'error',
|
||||
'components': {},
|
||||
'alerts': [f'Health check failed: {str(e)}'],
|
||||
'recommendations': ['Investigate health check system']
|
||||
}
|
||||
|
||||
async def _check_database_health(self, db: Session) -> Dict[str, Any]:
|
||||
"""Check database health and performance."""
|
||||
try:
|
||||
start_time = time.time()
|
||||
|
||||
# Test database connection
|
||||
try:
|
||||
result = db.execute(text("SELECT 1"))
|
||||
result.fetchone()
|
||||
connection_status = 'healthy'
|
||||
except Exception as e:
|
||||
connection_status = 'unhealthy'
|
||||
logger.error(f"Database connection test failed: {str(e)}")
|
||||
|
||||
# Test query performance
|
||||
try:
|
||||
query_start = time.time()
|
||||
result = db.execute(text("SELECT COUNT(*) FROM information_schema.tables"))
|
||||
result.fetchone()
|
||||
query_time = time.time() - query_start
|
||||
query_status = 'healthy' if query_time <= self.health_thresholds['database_response_time'] else 'degraded'
|
||||
except Exception as e:
|
||||
query_time = 0
|
||||
query_status = 'unhealthy'
|
||||
logger.error(f"Database query test failed: {str(e)}")
|
||||
|
||||
# Check database size and performance
|
||||
try:
|
||||
# Get database statistics
|
||||
db_stats = await self._get_database_statistics(db)
|
||||
except Exception as e:
|
||||
db_stats = {'error': str(e)}
|
||||
|
||||
total_time = time.time() - start_time
|
||||
|
||||
return {
|
||||
'status': 'healthy' if connection_status == 'healthy' and query_status == 'healthy' else 'degraded',
|
||||
'connection_status': connection_status,
|
||||
'query_status': query_status,
|
||||
'response_time': query_time,
|
||||
'total_check_time': total_time,
|
||||
'statistics': db_stats,
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking database health: {str(e)}")
|
||||
return {
|
||||
'status': 'unhealthy',
|
||||
'error': str(e),
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def _check_cache_health(self, cache_service) -> Dict[str, Any]:
|
||||
"""Check cache health and performance."""
|
||||
try:
|
||||
start_time = time.time()
|
||||
|
||||
# Test cache connectivity
|
||||
try:
|
||||
cache_stats = await cache_service.get_cache_stats()
|
||||
connectivity_status = 'healthy'
|
||||
except Exception as e:
|
||||
cache_stats = {}
|
||||
connectivity_status = 'unhealthy'
|
||||
logger.error(f"Cache connectivity test failed: {str(e)}")
|
||||
|
||||
# Test cache performance
|
||||
try:
|
||||
test_key = f"health_check_{int(time.time())}"
|
||||
test_data = {'test': 'data', 'timestamp': datetime.utcnow().isoformat()}
|
||||
|
||||
# Test write
|
||||
write_start = time.time()
|
||||
write_success = await cache_service.set_cached_data('health_check', test_key, test_data)
|
||||
write_time = time.time() - write_start
|
||||
|
||||
# Test read
|
||||
read_start = time.time()
|
||||
read_data = await cache_service.get_cached_data('health_check', test_key)
|
||||
read_time = time.time() - read_start
|
||||
|
||||
# Clean up
|
||||
await cache_service.invalidate_cache('health_check', test_key)
|
||||
|
||||
performance_status = 'healthy' if write_success and read_data and (write_time + read_time) <= self.health_thresholds['cache_response_time'] else 'degraded'
|
||||
|
||||
except Exception as e:
|
||||
write_time = 0
|
||||
read_time = 0
|
||||
performance_status = 'unhealthy'
|
||||
logger.error(f"Cache performance test failed: {str(e)}")
|
||||
|
||||
total_time = time.time() - start_time
|
||||
|
||||
return {
|
||||
'status': 'healthy' if connectivity_status == 'healthy' and performance_status == 'healthy' else 'degraded',
|
||||
'connectivity_status': connectivity_status,
|
||||
'performance_status': performance_status,
|
||||
'write_time': write_time,
|
||||
'read_time': read_time,
|
||||
'total_check_time': total_time,
|
||||
'statistics': cache_stats,
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking cache health: {str(e)}")
|
||||
return {
|
||||
'status': 'unhealthy',
|
||||
'error': str(e),
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def _check_ai_service_health(self, ai_service) -> Dict[str, Any]:
|
||||
"""Check AI service health and performance."""
|
||||
try:
|
||||
start_time = time.time()
|
||||
|
||||
# Test AI service connectivity
|
||||
try:
|
||||
# Simple test call to AI service
|
||||
test_prompt = "Test health check"
|
||||
ai_start = time.time()
|
||||
ai_response = await ai_service._call_ai_service(test_prompt, 'health_check')
|
||||
ai_time = time.time() - ai_start
|
||||
|
||||
connectivity_status = 'healthy' if ai_response else 'unhealthy'
|
||||
performance_status = 'healthy' if ai_time <= self.health_thresholds['ai_service_response_time'] else 'degraded'
|
||||
|
||||
except Exception as e:
|
||||
ai_time = 0
|
||||
connectivity_status = 'unhealthy'
|
||||
performance_status = 'unhealthy'
|
||||
logger.error(f"AI service health check failed: {str(e)}")
|
||||
|
||||
total_time = time.time() - start_time
|
||||
|
||||
return {
|
||||
'status': 'healthy' if connectivity_status == 'healthy' and performance_status == 'healthy' else 'degraded',
|
||||
'connectivity_status': connectivity_status,
|
||||
'performance_status': performance_status,
|
||||
'response_time': ai_time,
|
||||
'total_check_time': total_time,
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking AI service health: {str(e)}")
|
||||
return {
|
||||
'status': 'unhealthy',
|
||||
'error': str(e),
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def _check_system_resources(self) -> Dict[str, Any]:
|
||||
"""Check system resource usage."""
|
||||
try:
|
||||
import psutil
|
||||
|
||||
# CPU usage
|
||||
cpu_percent = psutil.cpu_percent(interval=1)
|
||||
cpu_status = 'healthy' if cpu_percent <= self.health_thresholds['cpu_usage_threshold'] else 'degraded'
|
||||
|
||||
# Memory usage
|
||||
memory = psutil.virtual_memory()
|
||||
memory_percent = memory.percent
|
||||
memory_status = 'healthy' if memory_percent <= self.health_thresholds['memory_usage_threshold'] else 'degraded'
|
||||
|
||||
# Disk usage
|
||||
disk = psutil.disk_usage('/')
|
||||
disk_percent = disk.percent
|
||||
disk_status = 'healthy' if disk_percent <= self.health_thresholds['disk_usage_threshold'] else 'degraded'
|
||||
|
||||
# Network status
|
||||
try:
|
||||
network = psutil.net_io_counters()
|
||||
network_status = 'healthy'
|
||||
except Exception:
|
||||
network_status = 'degraded'
|
||||
|
||||
return {
|
||||
'status': 'healthy' if all(s == 'healthy' for s in [cpu_status, memory_status, disk_status, network_status]) else 'degraded',
|
||||
'cpu': {
|
||||
'usage_percent': cpu_percent,
|
||||
'status': cpu_status
|
||||
},
|
||||
'memory': {
|
||||
'usage_percent': memory_percent,
|
||||
'available_gb': memory.available / (1024**3),
|
||||
'total_gb': memory.total / (1024**3),
|
||||
'status': memory_status
|
||||
},
|
||||
'disk': {
|
||||
'usage_percent': disk_percent,
|
||||
'free_gb': disk.free / (1024**3),
|
||||
'total_gb': disk.total / (1024**3),
|
||||
'status': disk_status
|
||||
},
|
||||
'network': {
|
||||
'status': network_status
|
||||
},
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking system resources: {str(e)}")
|
||||
return {
|
||||
'status': 'unhealthy',
|
||||
'error': str(e),
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def _get_database_statistics(self, db: Session) -> Dict[str, Any]:
|
||||
"""Get database statistics."""
|
||||
try:
|
||||
stats = {}
|
||||
|
||||
# Get table counts (simplified)
|
||||
try:
|
||||
result = db.execute(text("SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public'"))
|
||||
stats['table_count'] = result.fetchone()[0]
|
||||
except Exception:
|
||||
stats['table_count'] = 'unknown'
|
||||
|
||||
# Get database size (simplified)
|
||||
try:
|
||||
result = db.execute(text("SELECT pg_size_pretty(pg_database_size(current_database()))"))
|
||||
stats['database_size'] = result.fetchone()[0]
|
||||
except Exception:
|
||||
stats['database_size'] = 'unknown'
|
||||
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting database statistics: {str(e)}")
|
||||
return {'error': str(e)}
|
||||
|
||||
def _determine_overall_health(self, components: Dict[str, Any]) -> str:
|
||||
"""Determine overall system health based on component status."""
|
||||
try:
|
||||
statuses = []
|
||||
for component_name, component_data in components.items():
|
||||
if isinstance(component_data, dict) and 'status' in component_data:
|
||||
statuses.append(component_data['status'])
|
||||
|
||||
if not statuses:
|
||||
return 'unknown'
|
||||
|
||||
if 'unhealthy' in statuses:
|
||||
return 'unhealthy'
|
||||
elif 'degraded' in statuses:
|
||||
return 'degraded'
|
||||
elif all(status == 'healthy' for status in statuses):
|
||||
return 'healthy'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error determining overall health: {str(e)}")
|
||||
return 'unknown'
|
||||
|
||||
def _generate_health_alerts(self, components: Dict[str, Any]) -> List[str]:
|
||||
"""Generate health alerts based on component status."""
|
||||
try:
|
||||
alerts = []
|
||||
|
||||
for component_name, component_data in components.items():
|
||||
if isinstance(component_data, dict) and 'status' in component_data:
|
||||
status = component_data['status']
|
||||
|
||||
if status == 'unhealthy':
|
||||
alerts.append(f"CRITICAL: {component_name} is unhealthy")
|
||||
elif status == 'degraded':
|
||||
alerts.append(f"WARNING: {component_name} performance is degraded")
|
||||
|
||||
# Component-specific alerts
|
||||
if component_name == 'database' and component_data.get('response_time', 0) > self.health_thresholds['database_response_time']:
|
||||
alerts.append(f"WARNING: Database response time is slow: {component_data['response_time']:.2f}s")
|
||||
|
||||
elif component_name == 'cache' and component_data.get('write_time', 0) + component_data.get('read_time', 0) > self.health_thresholds['cache_response_time']:
|
||||
alerts.append(f"WARNING: Cache response time is slow: {component_data.get('write_time', 0) + component_data.get('read_time', 0):.2f}s")
|
||||
|
||||
elif component_name == 'ai_service' and component_data.get('response_time', 0) > self.health_thresholds['ai_service_response_time']:
|
||||
alerts.append(f"WARNING: AI service response time is slow: {component_data['response_time']:.2f}s")
|
||||
|
||||
elif component_name == 'system':
|
||||
cpu_data = component_data.get('cpu', {})
|
||||
memory_data = component_data.get('memory', {})
|
||||
disk_data = component_data.get('disk', {})
|
||||
|
||||
if cpu_data.get('usage_percent', 0) > self.health_thresholds['cpu_usage_threshold']:
|
||||
alerts.append(f"WARNING: High CPU usage: {cpu_data['usage_percent']:.1f}%")
|
||||
|
||||
if memory_data.get('usage_percent', 0) > self.health_thresholds['memory_usage_threshold']:
|
||||
alerts.append(f"WARNING: High memory usage: {memory_data['usage_percent']:.1f}%")
|
||||
|
||||
if disk_data.get('usage_percent', 0) > self.health_thresholds['disk_usage_threshold']:
|
||||
alerts.append(f"WARNING: High disk usage: {disk_data['usage_percent']:.1f}%")
|
||||
|
||||
return alerts
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating health alerts: {str(e)}")
|
||||
return ['Error generating health alerts']
|
||||
|
||||
async def _generate_health_recommendations(self, components: Dict[str, Any]) -> List[str]:
|
||||
"""Generate health recommendations based on component status."""
|
||||
try:
|
||||
recommendations = []
|
||||
|
||||
for component_name, component_data in components.items():
|
||||
if isinstance(component_data, dict) and 'status' in component_data:
|
||||
status = component_data['status']
|
||||
|
||||
if status == 'unhealthy':
|
||||
if component_name == 'database':
|
||||
recommendations.append("Investigate database connectivity and configuration")
|
||||
elif component_name == 'cache':
|
||||
recommendations.append("Check cache service configuration and connectivity")
|
||||
elif component_name == 'ai_service':
|
||||
recommendations.append("Verify AI service configuration and API keys")
|
||||
elif component_name == 'system':
|
||||
recommendations.append("Check system resources and restart if necessary")
|
||||
|
||||
elif status == 'degraded':
|
||||
if component_name == 'database':
|
||||
recommendations.append("Optimize database queries and add indexes")
|
||||
elif component_name == 'cache':
|
||||
recommendations.append("Consider cache optimization and memory allocation")
|
||||
elif component_name == 'ai_service':
|
||||
recommendations.append("Review AI service performance and rate limits")
|
||||
elif component_name == 'system':
|
||||
recommendations.append("Monitor system resources and consider scaling")
|
||||
|
||||
# Specific recommendations based on metrics
|
||||
if component_name == 'database' and component_data.get('response_time', 0) > self.health_thresholds['database_response_time']:
|
||||
recommendations.append("Add database indexes for frequently queried columns")
|
||||
recommendations.append("Consider database connection pooling")
|
||||
|
||||
elif component_name == 'system':
|
||||
cpu_data = component_data.get('cpu', {})
|
||||
memory_data = component_data.get('memory', {})
|
||||
disk_data = component_data.get('disk', {})
|
||||
|
||||
if cpu_data.get('usage_percent', 0) > self.health_thresholds['cpu_usage_threshold']:
|
||||
recommendations.append("Consider scaling CPU resources or optimizing CPU-intensive operations")
|
||||
|
||||
if memory_data.get('usage_percent', 0) > self.health_thresholds['memory_usage_threshold']:
|
||||
recommendations.append("Increase memory allocation or optimize memory usage")
|
||||
|
||||
if disk_data.get('usage_percent', 0) > self.health_thresholds['disk_usage_threshold']:
|
||||
recommendations.append("Clean up disk space or increase storage capacity")
|
||||
|
||||
return recommendations
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating health recommendations: {str(e)}")
|
||||
return ['Unable to generate health recommendations']
|
||||
|
||||
async def get_health_history(self, hours: int = 24) -> List[Dict[str, Any]]:
|
||||
"""Get health check history."""
|
||||
try:
|
||||
# This would typically query a database for historical health data
|
||||
# For now, return the current health status
|
||||
return [self.health_status] if self.health_status.get('timestamp') else []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting health history: {str(e)}")
|
||||
return []
|
||||
|
||||
async def set_health_thresholds(self, thresholds: Dict[str, float]) -> bool:
|
||||
"""Update health monitoring thresholds."""
|
||||
try:
|
||||
for key, value in thresholds.items():
|
||||
if key in self.health_thresholds:
|
||||
self.health_thresholds[key] = value
|
||||
logger.info(f"Updated health threshold {key}: {value}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting health thresholds: {str(e)}")
|
||||
return False
|
||||
|
||||
async def get_health_thresholds(self) -> Dict[str, float]:
|
||||
"""Get current health monitoring thresholds."""
|
||||
return self.health_thresholds.copy()
|
||||
|
||||
async def start_continuous_monitoring(self, interval_seconds: int = 300) -> None:
|
||||
"""Start continuous health monitoring."""
|
||||
try:
|
||||
logger.info(f"Starting continuous health monitoring with {interval_seconds}s interval")
|
||||
|
||||
while True:
|
||||
try:
|
||||
# This would typically use the database session and services
|
||||
# For now, just log that monitoring is active
|
||||
logger.info("Continuous health monitoring check")
|
||||
|
||||
await asyncio.sleep(interval_seconds)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in continuous health monitoring: {str(e)}")
|
||||
await asyncio.sleep(60) # Wait 1 minute before retrying
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error starting continuous monitoring: {str(e)}")
|
||||
@@ -0,0 +1,507 @@
|
||||
"""
|
||||
Optimization Service
|
||||
Performance optimization and monitoring.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import asyncio
|
||||
from typing import Dict, Any, List, Optional, Callable
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class PerformanceOptimizationService:
|
||||
"""Service for performance optimization and monitoring."""
|
||||
|
||||
def __init__(self):
|
||||
self.performance_metrics = {
|
||||
'response_times': {},
|
||||
'database_queries': {},
|
||||
'memory_usage': {},
|
||||
'cache_hit_rates': {}
|
||||
}
|
||||
|
||||
self.optimization_config = {
|
||||
'max_response_time': 2.0, # seconds
|
||||
'max_database_queries': 10,
|
||||
'max_memory_usage': 512, # MB
|
||||
'min_cache_hit_rate': 0.8
|
||||
}
|
||||
|
||||
async def optimize_response_time(self, operation_name: str, operation_func: Callable, *args, **kwargs) -> Dict[str, Any]:
|
||||
"""Optimize response time for operations."""
|
||||
try:
|
||||
start_time = time.time()
|
||||
|
||||
# Execute operation
|
||||
result = await operation_func(*args, **kwargs)
|
||||
|
||||
end_time = time.time()
|
||||
response_time = end_time - start_time
|
||||
|
||||
# Record performance metrics
|
||||
self._record_response_time(operation_name, response_time)
|
||||
|
||||
# Check if optimization is needed
|
||||
if response_time > self.optimization_config['max_response_time']:
|
||||
optimization_suggestions = await self._suggest_response_time_optimizations(operation_name, response_time)
|
||||
logger.warning(f"Slow response time for {operation_name}: {response_time:.2f}s")
|
||||
else:
|
||||
optimization_suggestions = []
|
||||
|
||||
return {
|
||||
'result': result,
|
||||
'response_time': response_time,
|
||||
'optimization_suggestions': optimization_suggestions,
|
||||
'performance_status': 'optimal' if response_time <= self.optimization_config['max_response_time'] else 'needs_optimization'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing response time for {operation_name}: {str(e)}")
|
||||
return {
|
||||
'result': None,
|
||||
'response_time': 0.0,
|
||||
'optimization_suggestions': ['Error occurred during operation'],
|
||||
'performance_status': 'error'
|
||||
}
|
||||
|
||||
async def optimize_database_queries(self, db: Session, query_func: Callable, *args, **kwargs) -> Dict[str, Any]:
|
||||
"""Optimize database queries."""
|
||||
try:
|
||||
start_time = time.time()
|
||||
query_count_before = self._get_query_count(db)
|
||||
|
||||
# Execute query function
|
||||
result = await query_func(db, *args, **kwargs)
|
||||
|
||||
end_time = time.time()
|
||||
query_count_after = self._get_query_count(db)
|
||||
query_count = query_count_after - query_count_before
|
||||
response_time = end_time - start_time
|
||||
|
||||
# Record database performance
|
||||
self._record_database_performance(query_func.__name__, query_count, response_time)
|
||||
|
||||
# Check if optimization is needed
|
||||
if query_count > self.optimization_config['max_database_queries']:
|
||||
optimization_suggestions = await self._suggest_database_optimizations(query_func.__name__, query_count, response_time)
|
||||
logger.warning(f"High query count for {query_func.__name__}: {query_count} queries")
|
||||
else:
|
||||
optimization_suggestions = []
|
||||
|
||||
return {
|
||||
'result': result,
|
||||
'query_count': query_count,
|
||||
'response_time': response_time,
|
||||
'optimization_suggestions': optimization_suggestions,
|
||||
'performance_status': 'optimal' if query_count <= self.optimization_config['max_database_queries'] else 'needs_optimization'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing database queries for {query_func.__name__}: {str(e)}")
|
||||
return {
|
||||
'result': None,
|
||||
'query_count': 0,
|
||||
'response_time': 0.0,
|
||||
'optimization_suggestions': ['Error occurred during database operation'],
|
||||
'performance_status': 'error'
|
||||
}
|
||||
|
||||
async def optimize_memory_usage(self, operation_name: str, operation_func: Callable, *args, **kwargs) -> Dict[str, Any]:
|
||||
"""Optimize memory usage for operations."""
|
||||
try:
|
||||
import psutil
|
||||
import os
|
||||
|
||||
process = psutil.Process(os.getpid())
|
||||
memory_before = process.memory_info().rss / 1024 / 1024 # MB
|
||||
|
||||
# Execute operation
|
||||
result = await operation_func(*args, **kwargs)
|
||||
|
||||
memory_after = process.memory_info().rss / 1024 / 1024 # MB
|
||||
memory_used = memory_after - memory_before
|
||||
|
||||
# Record memory usage
|
||||
self._record_memory_usage(operation_name, memory_used)
|
||||
|
||||
# Check if optimization is needed
|
||||
if memory_used > self.optimization_config['max_memory_usage']:
|
||||
optimization_suggestions = await self._suggest_memory_optimizations(operation_name, memory_used)
|
||||
logger.warning(f"High memory usage for {operation_name}: {memory_used:.2f}MB")
|
||||
else:
|
||||
optimization_suggestions = []
|
||||
|
||||
return {
|
||||
'result': result,
|
||||
'memory_used_mb': memory_used,
|
||||
'optimization_suggestions': optimization_suggestions,
|
||||
'performance_status': 'optimal' if memory_used <= self.optimization_config['max_memory_usage'] else 'needs_optimization'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing memory usage for {operation_name}: {str(e)}")
|
||||
return {
|
||||
'result': None,
|
||||
'memory_used_mb': 0.0,
|
||||
'optimization_suggestions': ['Error occurred during memory optimization'],
|
||||
'performance_status': 'error'
|
||||
}
|
||||
|
||||
async def optimize_cache_performance(self, cache_service, operation_name: str) -> Dict[str, Any]:
|
||||
"""Optimize cache performance."""
|
||||
try:
|
||||
# Get cache statistics
|
||||
cache_stats = await cache_service.get_cache_stats()
|
||||
|
||||
# Calculate cache hit rates
|
||||
hit_rates = {}
|
||||
for cache_type, stats in cache_stats.items():
|
||||
if stats.get('entries', 0) > 0:
|
||||
# This is a simplified calculation - in practice, you'd track actual hits/misses
|
||||
hit_rates[cache_type] = 0.8 # Placeholder
|
||||
|
||||
# Record cache performance
|
||||
self._record_cache_performance(operation_name, hit_rates)
|
||||
|
||||
# Check if optimization is needed
|
||||
optimization_suggestions = []
|
||||
for cache_type, hit_rate in hit_rates.items():
|
||||
if hit_rate < self.optimization_config['min_cache_hit_rate']:
|
||||
optimization_suggestions.append(f"Low cache hit rate for {cache_type}: {hit_rate:.2%}")
|
||||
|
||||
return {
|
||||
'cache_stats': cache_stats,
|
||||
'hit_rates': hit_rates,
|
||||
'optimization_suggestions': optimization_suggestions,
|
||||
'performance_status': 'optimal' if not optimization_suggestions else 'needs_optimization'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing cache performance: {str(e)}")
|
||||
return {
|
||||
'cache_stats': {},
|
||||
'hit_rates': {},
|
||||
'optimization_suggestions': ['Error occurred during cache optimization'],
|
||||
'performance_status': 'error'
|
||||
}
|
||||
|
||||
def _record_response_time(self, operation_name: str, response_time: float) -> None:
|
||||
"""Record response time metrics."""
|
||||
try:
|
||||
if operation_name not in self.performance_metrics['response_times']:
|
||||
self.performance_metrics['response_times'][operation_name] = []
|
||||
|
||||
self.performance_metrics['response_times'][operation_name].append({
|
||||
'response_time': response_time,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
})
|
||||
|
||||
# Keep only last 100 entries
|
||||
if len(self.performance_metrics['response_times'][operation_name]) > 100:
|
||||
self.performance_metrics['response_times'][operation_name] = self.performance_metrics['response_times'][operation_name][-100:]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording response time: {str(e)}")
|
||||
|
||||
def _record_database_performance(self, operation_name: str, query_count: int, response_time: float) -> None:
|
||||
"""Record database performance metrics."""
|
||||
try:
|
||||
if operation_name not in self.performance_metrics['database_queries']:
|
||||
self.performance_metrics['database_queries'][operation_name] = []
|
||||
|
||||
self.performance_metrics['database_queries'][operation_name].append({
|
||||
'query_count': query_count,
|
||||
'response_time': response_time,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
})
|
||||
|
||||
# Keep only last 100 entries
|
||||
if len(self.performance_metrics['database_queries'][operation_name]) > 100:
|
||||
self.performance_metrics['database_queries'][operation_name] = self.performance_metrics['database_queries'][operation_name][-100:]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording database performance: {str(e)}")
|
||||
|
||||
def _record_memory_usage(self, operation_name: str, memory_used: float) -> None:
|
||||
"""Record memory usage metrics."""
|
||||
try:
|
||||
if operation_name not in self.performance_metrics['memory_usage']:
|
||||
self.performance_metrics['memory_usage'][operation_name] = []
|
||||
|
||||
self.performance_metrics['memory_usage'][operation_name].append({
|
||||
'memory_used_mb': memory_used,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
})
|
||||
|
||||
# Keep only last 100 entries
|
||||
if len(self.performance_metrics['memory_usage'][operation_name]) > 100:
|
||||
self.performance_metrics['memory_usage'][operation_name] = self.performance_metrics['memory_usage'][operation_name][-100:]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording memory usage: {str(e)}")
|
||||
|
||||
def _record_cache_performance(self, operation_name: str, hit_rates: Dict[str, float]) -> None:
|
||||
"""Record cache performance metrics."""
|
||||
try:
|
||||
if operation_name not in self.performance_metrics['cache_hit_rates']:
|
||||
self.performance_metrics['cache_hit_rates'][operation_name] = []
|
||||
|
||||
self.performance_metrics['cache_hit_rates'][operation_name].append({
|
||||
'hit_rates': hit_rates,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
})
|
||||
|
||||
# Keep only last 100 entries
|
||||
if len(self.performance_metrics['cache_hit_rates'][operation_name]) > 100:
|
||||
self.performance_metrics['cache_hit_rates'][operation_name] = self.performance_metrics['cache_hit_rates'][operation_name][-100:]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording cache performance: {str(e)}")
|
||||
|
||||
def _get_query_count(self, db: Session) -> int:
|
||||
"""Get current query count from database session."""
|
||||
try:
|
||||
# This is a simplified implementation
|
||||
# In practice, you'd use database-specific monitoring tools
|
||||
return 0
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting query count: {str(e)}")
|
||||
return 0
|
||||
|
||||
async def _suggest_response_time_optimizations(self, operation_name: str, response_time: float) -> List[str]:
|
||||
"""Suggest optimizations for slow response times."""
|
||||
try:
|
||||
suggestions = []
|
||||
|
||||
if response_time > 5.0:
|
||||
suggestions.append("Consider implementing caching for this operation")
|
||||
suggestions.append("Review database query optimization")
|
||||
suggestions.append("Consider async processing for heavy operations")
|
||||
elif response_time > 2.0:
|
||||
suggestions.append("Optimize database queries")
|
||||
suggestions.append("Consider adding indexes for frequently accessed data")
|
||||
suggestions.append("Review data processing algorithms")
|
||||
|
||||
# Add operation-specific suggestions
|
||||
if 'ai_analysis' in operation_name.lower():
|
||||
suggestions.append("Consider implementing AI response caching")
|
||||
suggestions.append("Review AI service integration efficiency")
|
||||
elif 'onboarding' in operation_name.lower():
|
||||
suggestions.append("Optimize data transformation algorithms")
|
||||
suggestions.append("Consider batch processing for large datasets")
|
||||
|
||||
return suggestions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error suggesting response time optimizations: {str(e)}")
|
||||
return ["Unable to generate optimization suggestions"]
|
||||
|
||||
async def _suggest_database_optimizations(self, operation_name: str, query_count: int, response_time: float) -> List[str]:
|
||||
"""Suggest optimizations for database performance."""
|
||||
try:
|
||||
suggestions = []
|
||||
|
||||
if query_count > 20:
|
||||
suggestions.append("Implement query batching to reduce database calls")
|
||||
suggestions.append("Review and optimize N+1 query patterns")
|
||||
suggestions.append("Consider implementing database connection pooling")
|
||||
elif query_count > 10:
|
||||
suggestions.append("Optimize database queries with proper indexing")
|
||||
suggestions.append("Consider implementing query result caching")
|
||||
suggestions.append("Review database schema for optimization opportunities")
|
||||
|
||||
if response_time > 1.0:
|
||||
suggestions.append("Add database indexes for frequently queried columns")
|
||||
suggestions.append("Consider read replicas for heavy read operations")
|
||||
suggestions.append("Optimize database connection settings")
|
||||
|
||||
# Add operation-specific suggestions
|
||||
if 'strategy' in operation_name.lower():
|
||||
suggestions.append("Consider implementing strategy data caching")
|
||||
suggestions.append("Optimize strategy-related database queries")
|
||||
elif 'onboarding' in operation_name.lower():
|
||||
suggestions.append("Batch onboarding data processing")
|
||||
suggestions.append("Optimize onboarding data retrieval queries")
|
||||
|
||||
return suggestions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error suggesting database optimizations: {str(e)}")
|
||||
return ["Unable to generate database optimization suggestions"]
|
||||
|
||||
async def _suggest_memory_optimizations(self, operation_name: str, memory_used: float) -> List[str]:
|
||||
"""Suggest optimizations for memory usage."""
|
||||
try:
|
||||
suggestions = []
|
||||
|
||||
if memory_used > 100:
|
||||
suggestions.append("Implement data streaming for large datasets")
|
||||
suggestions.append("Review memory-intensive data structures")
|
||||
suggestions.append("Consider implementing pagination")
|
||||
elif memory_used > 50:
|
||||
suggestions.append("Optimize data processing algorithms")
|
||||
suggestions.append("Review object lifecycle management")
|
||||
suggestions.append("Consider implementing lazy loading")
|
||||
|
||||
# Add operation-specific suggestions
|
||||
if 'ai_analysis' in operation_name.lower():
|
||||
suggestions.append("Implement AI response streaming")
|
||||
suggestions.append("Optimize AI model memory usage")
|
||||
elif 'onboarding' in operation_name.lower():
|
||||
suggestions.append("Process onboarding data in smaller chunks")
|
||||
suggestions.append("Implement data cleanup after processing")
|
||||
|
||||
return suggestions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error suggesting memory optimizations: {str(e)}")
|
||||
return ["Unable to generate memory optimization suggestions"]
|
||||
|
||||
async def get_performance_report(self) -> Dict[str, Any]:
|
||||
"""Generate comprehensive performance report."""
|
||||
try:
|
||||
report = {
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'response_times': self._calculate_average_response_times(),
|
||||
'database_performance': self._calculate_database_performance(),
|
||||
'memory_usage': self._calculate_memory_usage(),
|
||||
'cache_performance': self._calculate_cache_performance(),
|
||||
'optimization_recommendations': await self._generate_optimization_recommendations()
|
||||
}
|
||||
|
||||
return report
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating performance report: {str(e)}")
|
||||
return {
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
def _calculate_average_response_times(self) -> Dict[str, float]:
|
||||
"""Calculate average response times for operations."""
|
||||
try:
|
||||
averages = {}
|
||||
for operation_name, times in self.performance_metrics['response_times'].items():
|
||||
if times:
|
||||
avg_time = sum(t['response_time'] for t in times) / len(times)
|
||||
averages[operation_name] = avg_time
|
||||
|
||||
return averages
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating average response times: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _calculate_database_performance(self) -> Dict[str, Dict[str, float]]:
|
||||
"""Calculate database performance metrics."""
|
||||
try:
|
||||
performance = {}
|
||||
for operation_name, queries in self.performance_metrics['database_queries'].items():
|
||||
if queries:
|
||||
avg_queries = sum(q['query_count'] for q in queries) / len(queries)
|
||||
avg_time = sum(q['response_time'] for q in queries) / len(queries)
|
||||
performance[operation_name] = {
|
||||
'average_queries': avg_queries,
|
||||
'average_response_time': avg_time
|
||||
}
|
||||
|
||||
return performance
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating database performance: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _calculate_memory_usage(self) -> Dict[str, float]:
|
||||
"""Calculate average memory usage for operations."""
|
||||
try:
|
||||
averages = {}
|
||||
for operation_name, usage in self.performance_metrics['memory_usage'].items():
|
||||
if usage:
|
||||
avg_memory = sum(u['memory_used_mb'] for u in usage) / len(usage)
|
||||
averages[operation_name] = avg_memory
|
||||
|
||||
return averages
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating memory usage: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _calculate_cache_performance(self) -> Dict[str, float]:
|
||||
"""Calculate cache performance metrics."""
|
||||
try:
|
||||
performance = {}
|
||||
for operation_name, rates in self.performance_metrics['cache_hit_rates'].items():
|
||||
if rates:
|
||||
# Calculate average hit rate across all cache types
|
||||
all_rates = []
|
||||
for rate_data in rates:
|
||||
if rate_data['hit_rates']:
|
||||
avg_rate = sum(rate_data['hit_rates'].values()) / len(rate_data['hit_rates'])
|
||||
all_rates.append(avg_rate)
|
||||
|
||||
if all_rates:
|
||||
performance[operation_name] = sum(all_rates) / len(all_rates)
|
||||
|
||||
return performance
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating cache performance: {str(e)}")
|
||||
return {}
|
||||
|
||||
async def _generate_optimization_recommendations(self) -> List[str]:
|
||||
"""Generate optimization recommendations based on performance data."""
|
||||
try:
|
||||
recommendations = []
|
||||
|
||||
# Check response times
|
||||
avg_response_times = self._calculate_average_response_times()
|
||||
for operation, avg_time in avg_response_times.items():
|
||||
if avg_time > self.optimization_config['max_response_time']:
|
||||
recommendations.append(f"Optimize response time for {operation} (avg: {avg_time:.2f}s)")
|
||||
|
||||
# Check database performance
|
||||
db_performance = self._calculate_database_performance()
|
||||
for operation, perf in db_performance.items():
|
||||
if perf['average_queries'] > self.optimization_config['max_database_queries']:
|
||||
recommendations.append(f"Reduce database queries for {operation} (avg: {perf['average_queries']:.1f} queries)")
|
||||
|
||||
# Check memory usage
|
||||
memory_usage = self._calculate_memory_usage()
|
||||
for operation, memory in memory_usage.items():
|
||||
if memory > self.optimization_config['max_memory_usage']:
|
||||
recommendations.append(f"Optimize memory usage for {operation} (avg: {memory:.1f}MB)")
|
||||
|
||||
return recommendations
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating optimization recommendations: {str(e)}")
|
||||
return ["Unable to generate optimization recommendations"]
|
||||
|
||||
async def cleanup_old_metrics(self, days_to_keep: int = 30) -> Dict[str, int]:
|
||||
"""Clean up old performance metrics."""
|
||||
try:
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=days_to_keep)
|
||||
cleaned_count = 0
|
||||
|
||||
for metric_type, operations in self.performance_metrics.items():
|
||||
for operation_name, metrics in operations.items():
|
||||
if isinstance(metrics, list):
|
||||
original_count = len(metrics)
|
||||
# Filter out old metrics
|
||||
self.performance_metrics[metric_type][operation_name] = [
|
||||
m for m in metrics
|
||||
if datetime.fromisoformat(m['timestamp']) > cutoff_date
|
||||
]
|
||||
cleaned_count += original_count - len(self.performance_metrics[metric_type][operation_name])
|
||||
|
||||
logger.info(f"Cleaned up {cleaned_count} old performance metrics")
|
||||
return {'cleaned_count': cleaned_count}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up old metrics: {str(e)}")
|
||||
return {'cleaned_count': 0}
|
||||
Reference in New Issue
Block a user