ALwrity Version 0.5.0 (Fastapi + React )

This commit is contained in:
ajaysi
2025-08-06 12:48:02 +05:30
parent f28a919caa
commit 32f97fa6b3
476 changed files with 115544 additions and 28747 deletions

View File

@@ -0,0 +1,10 @@
"""
Performance Module
Caching, optimization, and health monitoring services.
"""
from .caching import CachingService
from .optimization import PerformanceOptimizationService
from .health_monitoring import HealthMonitoringService
__all__ = ['CachingService', 'PerformanceOptimizationService', 'HealthMonitoringService']

View File

@@ -0,0 +1,469 @@
"""
Caching Service
Cache management and optimization.
"""
import logging
import json
import hashlib
from typing import Dict, Any, Optional, List
from datetime import datetime, timedelta
logger = logging.getLogger(__name__)
# Try to import Redis, fallback to in-memory if not available
try:
import redis
REDIS_AVAILABLE = True
except ImportError:
REDIS_AVAILABLE = False
logger.warning("Redis not available, using in-memory caching")
class CachingService:
"""Service for intelligent caching of content strategy data."""
def __init__(self):
# Cache configuration
self.cache_config = {
'ai_analysis': {
'ttl': 3600, # 1 hour
'max_size': 1000,
'priority': 'high'
},
'onboarding_data': {
'ttl': 1800, # 30 minutes
'max_size': 500,
'priority': 'medium'
},
'strategy_cache': {
'ttl': 7200, # 2 hours
'max_size': 200,
'priority': 'high'
},
'field_transformations': {
'ttl': 900, # 15 minutes
'max_size': 1000,
'priority': 'low'
}
}
# Initialize Redis connection if available
self.redis_available = False
if REDIS_AVAILABLE:
try:
self.redis_client = redis.Redis(
host='localhost',
port=6379,
db=0,
decode_responses=True,
socket_connect_timeout=5,
socket_timeout=5
)
# Test connection
self.redis_client.ping()
self.redis_available = True
logger.info("Redis connection established successfully")
except Exception as e:
logger.warning(f"Redis connection failed: {str(e)}. Using in-memory cache.")
self.redis_available = False
self.memory_cache = {}
else:
logger.info("Using in-memory cache (Redis not available)")
self.memory_cache = {}
def get_cache_key(self, cache_type: str, identifier: str, **kwargs) -> str:
"""Generate a unique cache key."""
try:
# Create a hash of the identifier and additional parameters
key_data = f"{cache_type}:{identifier}"
if kwargs:
key_data += ":" + json.dumps(kwargs, sort_keys=True)
# Create hash for consistent key length
key_hash = hashlib.md5(key_data.encode()).hexdigest()
return f"content_strategy:{cache_type}:{key_hash}"
except Exception as e:
logger.error(f"Error generating cache key: {str(e)}")
return f"content_strategy:{cache_type}:{identifier}"
async def get_cached_data(self, cache_type: str, identifier: str, **kwargs) -> Optional[Dict[str, Any]]:
"""Retrieve cached data."""
try:
if not self.redis_available:
return self._get_from_memory_cache(cache_type, identifier, **kwargs)
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
cached_data = self.redis_client.get(cache_key)
if cached_data:
data = json.loads(cached_data)
logger.info(f"Cache hit for {cache_type}:{identifier}")
return data
else:
logger.info(f"Cache miss for {cache_type}:{identifier}")
return None
except Exception as e:
logger.error(f"Error retrieving cached data: {str(e)}")
return None
async def set_cached_data(self, cache_type: str, identifier: str, data: Dict[str, Any], **kwargs) -> bool:
"""Store data in cache."""
try:
if not self.redis_available:
return self._set_in_memory_cache(cache_type, identifier, data, **kwargs)
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
ttl = self.cache_config.get(cache_type, {}).get('ttl', 3600)
# Add metadata to cached data
cached_data = {
'data': data,
'metadata': {
'cached_at': datetime.utcnow().isoformat(),
'cache_type': cache_type,
'identifier': identifier,
'ttl': ttl
}
}
# Store in Redis with TTL
result = self.redis_client.setex(
cache_key,
ttl,
json.dumps(cached_data, default=str)
)
if result:
logger.info(f"Data cached successfully for {cache_type}:{identifier}")
await self._update_cache_stats(cache_type, 'set')
return True
else:
logger.warning(f"Failed to cache data for {cache_type}:{identifier}")
return False
except Exception as e:
logger.error(f"Error setting cached data: {str(e)}")
return False
async def invalidate_cache(self, cache_type: str, identifier: str, **kwargs) -> bool:
"""Invalidate specific cached data."""
try:
if not self.redis_available:
return self._invalidate_memory_cache(cache_type, identifier, **kwargs)
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
result = self.redis_client.delete(cache_key)
if result:
logger.info(f"Cache invalidated for {cache_type}:{identifier}")
await self._update_cache_stats(cache_type, 'invalidate')
return True
else:
logger.warning(f"No cache entry found to invalidate for {cache_type}:{identifier}")
return False
except Exception as e:
logger.error(f"Error invalidating cache: {str(e)}")
return False
async def clear_cache_type(self, cache_type: str) -> bool:
"""Clear all cached data of a specific type."""
try:
if not self.redis_available:
return self._clear_memory_cache_type(cache_type)
pattern = f"content_strategy:{cache_type}:*"
keys = self.redis_client.keys(pattern)
if keys:
result = self.redis_client.delete(*keys)
logger.info(f"Cleared {result} cache entries for {cache_type}")
await self._update_cache_stats(cache_type, 'clear')
return True
else:
logger.info(f"No cache entries found for {cache_type}")
return True
except Exception as e:
logger.error(f"Error clearing cache type {cache_type}: {str(e)}")
return False
async def get_cache_stats(self, cache_type: Optional[str] = None) -> Dict[str, Any]:
"""Get cache statistics."""
try:
if not self.redis_available:
return self._get_memory_cache_stats(cache_type)
stats = {}
if cache_type:
pattern = f"content_strategy:{cache_type}:*"
keys = self.redis_client.keys(pattern)
stats[cache_type] = {
'entries': len(keys),
'size_bytes': sum(len(self.redis_client.get(key) or '') for key in keys),
'config': self.cache_config.get(cache_type, {})
}
else:
for cache_type_name in self.cache_config.keys():
pattern = f"content_strategy:{cache_type_name}:*"
keys = self.redis_client.keys(pattern)
stats[cache_type_name] = {
'entries': len(keys),
'size_bytes': sum(len(self.redis_client.get(key) or '') for key in keys),
'config': self.cache_config.get(cache_type_name, {})
}
return stats
except Exception as e:
logger.error(f"Error getting cache stats: {str(e)}")
return {}
async def optimize_cache(self) -> Dict[str, Any]:
"""Optimize cache by removing expired entries and managing memory."""
try:
if not self.redis_available:
return self._optimize_memory_cache()
optimization_results = {}
for cache_type, config in self.cache_config.items():
pattern = f"content_strategy:{cache_type}:*"
keys = self.redis_client.keys(pattern)
if len(keys) > config.get('max_size', 1000):
# Remove oldest entries to maintain max size
keys_with_times = []
for key in keys:
ttl = self.redis_client.ttl(key)
if ttl > 0: # Key still has TTL
keys_with_times.append((key, ttl))
# Sort by TTL (oldest first)
keys_with_times.sort(key=lambda x: x[1])
# Remove excess entries
excess_count = len(keys) - config.get('max_size', 1000)
keys_to_remove = [key for key, _ in keys_with_times[:excess_count]]
if keys_to_remove:
removed_count = self.redis_client.delete(*keys_to_remove)
optimization_results[cache_type] = {
'entries_removed': removed_count,
'reason': 'max_size_exceeded'
}
logger.info(f"Optimized {cache_type} cache: removed {removed_count} entries")
return optimization_results
except Exception as e:
logger.error(f"Error optimizing cache: {str(e)}")
return {}
async def _update_cache_stats(self, cache_type: str, operation: str) -> None:
"""Update cache statistics."""
try:
if not self.redis_available:
return
stats_key = f"cache_stats:{cache_type}"
current_stats = self.redis_client.hgetall(stats_key)
# Update operation counts
current_stats[f"{operation}_count"] = str(int(current_stats.get(f"{operation}_count", 0)) + 1)
current_stats['last_updated'] = datetime.utcnow().isoformat()
# Store updated stats
self.redis_client.hset(stats_key, mapping=current_stats)
except Exception as e:
logger.error(f"Error updating cache stats: {str(e)}")
# Memory cache fallback methods
def _get_from_memory_cache(self, cache_type: str, identifier: str, **kwargs) -> Optional[Dict[str, Any]]:
"""Get data from memory cache."""
try:
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
cached_data = self.memory_cache.get(cache_key)
if cached_data:
# Check if data is still valid
cached_at = datetime.fromisoformat(cached_data['metadata']['cached_at'])
ttl = cached_data['metadata']['ttl']
if datetime.utcnow() - cached_at < timedelta(seconds=ttl):
logger.info(f"Memory cache hit for {cache_type}:{identifier}")
return cached_data['data']
else:
# Remove expired entry
del self.memory_cache[cache_key]
return None
except Exception as e:
logger.error(f"Error getting from memory cache: {str(e)}")
return None
def _set_in_memory_cache(self, cache_type: str, identifier: str, data: Dict[str, Any], **kwargs) -> bool:
"""Set data in memory cache."""
try:
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
ttl = self.cache_config.get(cache_type, {}).get('ttl', 3600)
cached_data = {
'data': data,
'metadata': {
'cached_at': datetime.utcnow().isoformat(),
'cache_type': cache_type,
'identifier': identifier,
'ttl': ttl
}
}
# Check max size and remove oldest if needed
max_size = self.cache_config.get(cache_type, {}).get('max_size', 1000)
if len(self.memory_cache) >= max_size:
# Remove oldest entry
oldest_key = min(self.memory_cache.keys(),
key=lambda k: self.memory_cache[k]['metadata']['cached_at'])
del self.memory_cache[oldest_key]
self.memory_cache[cache_key] = cached_data
logger.info(f"Data cached in memory for {cache_type}:{identifier}")
return True
except Exception as e:
logger.error(f"Error setting in memory cache: {str(e)}")
return False
def _invalidate_memory_cache(self, cache_type: str, identifier: str, **kwargs) -> bool:
"""Invalidate memory cache entry."""
try:
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
if cache_key in self.memory_cache:
del self.memory_cache[cache_key]
logger.info(f"Memory cache invalidated for {cache_type}:{identifier}")
return True
return False
except Exception as e:
logger.error(f"Error invalidating memory cache: {str(e)}")
return False
def _clear_memory_cache_type(self, cache_type: str) -> bool:
"""Clear memory cache by type."""
try:
keys_to_remove = [key for key in self.memory_cache.keys()
if key.startswith(f"content_strategy:{cache_type}:")]
for key in keys_to_remove:
del self.memory_cache[key]
logger.info(f"Cleared {len(keys_to_remove)} memory cache entries for {cache_type}")
return True
except Exception as e:
logger.error(f"Error clearing memory cache type: {str(e)}")
return False
def _get_memory_cache_stats(self, cache_type: Optional[str] = None) -> Dict[str, Any]:
"""Get memory cache statistics."""
try:
stats = {}
if cache_type:
keys = [key for key in self.memory_cache.keys()
if key.startswith(f"content_strategy:{cache_type}:")]
stats[cache_type] = {
'entries': len(keys),
'size_bytes': sum(len(str(value)) for value in [self.memory_cache[key] for key in keys]),
'config': self.cache_config.get(cache_type, {})
}
else:
for cache_type_name in self.cache_config.keys():
keys = [key for key in self.memory_cache.keys()
if key.startswith(f"content_strategy:{cache_type_name}:")]
stats[cache_type_name] = {
'entries': len(keys),
'size_bytes': sum(len(str(value)) for value in [self.memory_cache[key] for key in keys]),
'config': self.cache_config.get(cache_type_name, {})
}
return stats
except Exception as e:
logger.error(f"Error getting memory cache stats: {str(e)}")
return {}
def _optimize_memory_cache(self) -> Dict[str, Any]:
"""Optimize memory cache."""
try:
optimization_results = {}
for cache_type, config in self.cache_config.items():
keys = [key for key in self.memory_cache.keys()
if key.startswith(f"content_strategy:{cache_type}:")]
if len(keys) > config.get('max_size', 1000):
# Remove oldest entries
keys_with_times = []
for key in keys:
cached_at = datetime.fromisoformat(self.memory_cache[key]['metadata']['cached_at'])
keys_with_times.append((key, cached_at))
# Sort by cached time (oldest first)
keys_with_times.sort(key=lambda x: x[1])
# Remove excess entries
excess_count = len(keys) - config.get('max_size', 1000)
keys_to_remove = [key for key, _ in keys_with_times[:excess_count]]
for key in keys_to_remove:
del self.memory_cache[key]
optimization_results[cache_type] = {
'entries_removed': len(keys_to_remove),
'reason': 'max_size_exceeded'
}
return optimization_results
except Exception as e:
logger.error(f"Error optimizing memory cache: {str(e)}")
return {}
# Cache-specific methods for different data types
async def cache_ai_analysis(self, user_id: int, analysis_type: str, analysis_data: Dict[str, Any]) -> bool:
"""Cache AI analysis results."""
return await self.set_cached_data('ai_analysis', f"{user_id}:{analysis_type}", analysis_data)
async def get_cached_ai_analysis(self, user_id: int, analysis_type: str) -> Optional[Dict[str, Any]]:
"""Get cached AI analysis results."""
return await self.get_cached_data('ai_analysis', f"{user_id}:{analysis_type}")
async def cache_onboarding_data(self, user_id: int, onboarding_data: Dict[str, Any]) -> bool:
"""Cache onboarding data."""
return await self.set_cached_data('onboarding_data', str(user_id), onboarding_data)
async def get_cached_onboarding_data(self, user_id: int) -> Optional[Dict[str, Any]]:
"""Get cached onboarding data."""
return await self.get_cached_data('onboarding_data', str(user_id))
async def cache_strategy(self, strategy_id: int, strategy_data: Dict[str, Any]) -> bool:
"""Cache strategy data."""
return await self.set_cached_data('strategy_cache', str(strategy_id), strategy_data)
async def get_cached_strategy(self, strategy_id: int) -> Optional[Dict[str, Any]]:
"""Get cached strategy data."""
return await self.get_cached_data('strategy_cache', str(strategy_id))
async def cache_field_transformations(self, user_id: int, transformations: Dict[str, Any]) -> bool:
"""Cache field transformations."""
return await self.set_cached_data('field_transformations', str(user_id), transformations)
async def get_cached_field_transformations(self, user_id: int) -> Optional[Dict[str, Any]]:
"""Get cached field transformations."""
return await self.get_cached_data('field_transformations', str(user_id))

View File

@@ -0,0 +1,503 @@
"""
Health Monitoring Service
System health monitoring and performance tracking.
"""
import logging
import time
import asyncio
from typing import Dict, Any, List, Optional
from datetime import datetime, timedelta
from sqlalchemy.orm import Session
from sqlalchemy import text
logger = logging.getLogger(__name__)
class HealthMonitoringService:
"""Service for system health monitoring and assessment."""
def __init__(self):
self.health_thresholds = {
'database_response_time': 1.0, # seconds
'cache_response_time': 0.1, # seconds
'ai_service_response_time': 5.0, # seconds
'memory_usage_threshold': 80, # percentage
'cpu_usage_threshold': 80, # percentage
'disk_usage_threshold': 90, # percentage
'error_rate_threshold': 0.05 # 5%
}
self.health_status = {
'timestamp': None,
'overall_status': 'healthy',
'components': {},
'alerts': [],
'recommendations': []
}
async def check_system_health(self, db: Session, cache_service=None, ai_service=None) -> Dict[str, Any]:
"""Perform comprehensive system health check."""
try:
logger.info("Starting comprehensive system health check")
health_report = {
'timestamp': datetime.utcnow().isoformat(),
'overall_status': 'healthy',
'components': {},
'alerts': [],
'recommendations': []
}
# Check database health
db_health = await self._check_database_health(db)
health_report['components']['database'] = db_health
# Check cache health
if cache_service:
cache_health = await self._check_cache_health(cache_service)
health_report['components']['cache'] = cache_health
else:
health_report['components']['cache'] = {'status': 'not_available', 'message': 'Cache service not provided'}
# Check AI service health
if ai_service:
ai_health = await self._check_ai_service_health(ai_service)
health_report['components']['ai_service'] = ai_health
else:
health_report['components']['ai_service'] = {'status': 'not_available', 'message': 'AI service not provided'}
# Check system resources
system_health = await self._check_system_resources()
health_report['components']['system'] = system_health
# Determine overall status
health_report['overall_status'] = self._determine_overall_health(health_report['components'])
# Generate alerts and recommendations
health_report['alerts'] = self._generate_health_alerts(health_report['components'])
health_report['recommendations'] = await self._generate_health_recommendations(health_report['components'])
# Update health status
self.health_status = health_report
logger.info(f"System health check completed. Overall status: {health_report['overall_status']}")
return health_report
except Exception as e:
logger.error(f"Error during system health check: {str(e)}")
return {
'timestamp': datetime.utcnow().isoformat(),
'overall_status': 'error',
'components': {},
'alerts': [f'Health check failed: {str(e)}'],
'recommendations': ['Investigate health check system']
}
async def _check_database_health(self, db: Session) -> Dict[str, Any]:
"""Check database health and performance."""
try:
start_time = time.time()
# Test database connection
try:
result = db.execute(text("SELECT 1"))
result.fetchone()
connection_status = 'healthy'
except Exception as e:
connection_status = 'unhealthy'
logger.error(f"Database connection test failed: {str(e)}")
# Test query performance
try:
query_start = time.time()
result = db.execute(text("SELECT COUNT(*) FROM information_schema.tables"))
result.fetchone()
query_time = time.time() - query_start
query_status = 'healthy' if query_time <= self.health_thresholds['database_response_time'] else 'degraded'
except Exception as e:
query_time = 0
query_status = 'unhealthy'
logger.error(f"Database query test failed: {str(e)}")
# Check database size and performance
try:
# Get database statistics
db_stats = await self._get_database_statistics(db)
except Exception as e:
db_stats = {'error': str(e)}
total_time = time.time() - start_time
return {
'status': 'healthy' if connection_status == 'healthy' and query_status == 'healthy' else 'degraded',
'connection_status': connection_status,
'query_status': query_status,
'response_time': query_time,
'total_check_time': total_time,
'statistics': db_stats,
'last_checked': datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Error checking database health: {str(e)}")
return {
'status': 'unhealthy',
'error': str(e),
'last_checked': datetime.utcnow().isoformat()
}
async def _check_cache_health(self, cache_service) -> Dict[str, Any]:
"""Check cache health and performance."""
try:
start_time = time.time()
# Test cache connectivity
try:
cache_stats = await cache_service.get_cache_stats()
connectivity_status = 'healthy'
except Exception as e:
cache_stats = {}
connectivity_status = 'unhealthy'
logger.error(f"Cache connectivity test failed: {str(e)}")
# Test cache performance
try:
test_key = f"health_check_{int(time.time())}"
test_data = {'test': 'data', 'timestamp': datetime.utcnow().isoformat()}
# Test write
write_start = time.time()
write_success = await cache_service.set_cached_data('health_check', test_key, test_data)
write_time = time.time() - write_start
# Test read
read_start = time.time()
read_data = await cache_service.get_cached_data('health_check', test_key)
read_time = time.time() - read_start
# Clean up
await cache_service.invalidate_cache('health_check', test_key)
performance_status = 'healthy' if write_success and read_data and (write_time + read_time) <= self.health_thresholds['cache_response_time'] else 'degraded'
except Exception as e:
write_time = 0
read_time = 0
performance_status = 'unhealthy'
logger.error(f"Cache performance test failed: {str(e)}")
total_time = time.time() - start_time
return {
'status': 'healthy' if connectivity_status == 'healthy' and performance_status == 'healthy' else 'degraded',
'connectivity_status': connectivity_status,
'performance_status': performance_status,
'write_time': write_time,
'read_time': read_time,
'total_check_time': total_time,
'statistics': cache_stats,
'last_checked': datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Error checking cache health: {str(e)}")
return {
'status': 'unhealthy',
'error': str(e),
'last_checked': datetime.utcnow().isoformat()
}
async def _check_ai_service_health(self, ai_service) -> Dict[str, Any]:
"""Check AI service health and performance."""
try:
start_time = time.time()
# Test AI service connectivity
try:
# Simple test call to AI service
test_prompt = "Test health check"
ai_start = time.time()
ai_response = await ai_service._call_ai_service(test_prompt, 'health_check')
ai_time = time.time() - ai_start
connectivity_status = 'healthy' if ai_response else 'unhealthy'
performance_status = 'healthy' if ai_time <= self.health_thresholds['ai_service_response_time'] else 'degraded'
except Exception as e:
ai_time = 0
connectivity_status = 'unhealthy'
performance_status = 'unhealthy'
logger.error(f"AI service health check failed: {str(e)}")
total_time = time.time() - start_time
return {
'status': 'healthy' if connectivity_status == 'healthy' and performance_status == 'healthy' else 'degraded',
'connectivity_status': connectivity_status,
'performance_status': performance_status,
'response_time': ai_time,
'total_check_time': total_time,
'last_checked': datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Error checking AI service health: {str(e)}")
return {
'status': 'unhealthy',
'error': str(e),
'last_checked': datetime.utcnow().isoformat()
}
async def _check_system_resources(self) -> Dict[str, Any]:
"""Check system resource usage."""
try:
import psutil
# CPU usage
cpu_percent = psutil.cpu_percent(interval=1)
cpu_status = 'healthy' if cpu_percent <= self.health_thresholds['cpu_usage_threshold'] else 'degraded'
# Memory usage
memory = psutil.virtual_memory()
memory_percent = memory.percent
memory_status = 'healthy' if memory_percent <= self.health_thresholds['memory_usage_threshold'] else 'degraded'
# Disk usage
disk = psutil.disk_usage('/')
disk_percent = disk.percent
disk_status = 'healthy' if disk_percent <= self.health_thresholds['disk_usage_threshold'] else 'degraded'
# Network status
try:
network = psutil.net_io_counters()
network_status = 'healthy'
except Exception:
network_status = 'degraded'
return {
'status': 'healthy' if all(s == 'healthy' for s in [cpu_status, memory_status, disk_status, network_status]) else 'degraded',
'cpu': {
'usage_percent': cpu_percent,
'status': cpu_status
},
'memory': {
'usage_percent': memory_percent,
'available_gb': memory.available / (1024**3),
'total_gb': memory.total / (1024**3),
'status': memory_status
},
'disk': {
'usage_percent': disk_percent,
'free_gb': disk.free / (1024**3),
'total_gb': disk.total / (1024**3),
'status': disk_status
},
'network': {
'status': network_status
},
'last_checked': datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Error checking system resources: {str(e)}")
return {
'status': 'unhealthy',
'error': str(e),
'last_checked': datetime.utcnow().isoformat()
}
async def _get_database_statistics(self, db: Session) -> Dict[str, Any]:
"""Get database statistics."""
try:
stats = {}
# Get table counts (simplified)
try:
result = db.execute(text("SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public'"))
stats['table_count'] = result.fetchone()[0]
except Exception:
stats['table_count'] = 'unknown'
# Get database size (simplified)
try:
result = db.execute(text("SELECT pg_size_pretty(pg_database_size(current_database()))"))
stats['database_size'] = result.fetchone()[0]
except Exception:
stats['database_size'] = 'unknown'
return stats
except Exception as e:
logger.error(f"Error getting database statistics: {str(e)}")
return {'error': str(e)}
def _determine_overall_health(self, components: Dict[str, Any]) -> str:
"""Determine overall system health based on component status."""
try:
statuses = []
for component_name, component_data in components.items():
if isinstance(component_data, dict) and 'status' in component_data:
statuses.append(component_data['status'])
if not statuses:
return 'unknown'
if 'unhealthy' in statuses:
return 'unhealthy'
elif 'degraded' in statuses:
return 'degraded'
elif all(status == 'healthy' for status in statuses):
return 'healthy'
else:
return 'unknown'
except Exception as e:
logger.error(f"Error determining overall health: {str(e)}")
return 'unknown'
def _generate_health_alerts(self, components: Dict[str, Any]) -> List[str]:
"""Generate health alerts based on component status."""
try:
alerts = []
for component_name, component_data in components.items():
if isinstance(component_data, dict) and 'status' in component_data:
status = component_data['status']
if status == 'unhealthy':
alerts.append(f"CRITICAL: {component_name} is unhealthy")
elif status == 'degraded':
alerts.append(f"WARNING: {component_name} performance is degraded")
# Component-specific alerts
if component_name == 'database' and component_data.get('response_time', 0) > self.health_thresholds['database_response_time']:
alerts.append(f"WARNING: Database response time is slow: {component_data['response_time']:.2f}s")
elif component_name == 'cache' and component_data.get('write_time', 0) + component_data.get('read_time', 0) > self.health_thresholds['cache_response_time']:
alerts.append(f"WARNING: Cache response time is slow: {component_data.get('write_time', 0) + component_data.get('read_time', 0):.2f}s")
elif component_name == 'ai_service' and component_data.get('response_time', 0) > self.health_thresholds['ai_service_response_time']:
alerts.append(f"WARNING: AI service response time is slow: {component_data['response_time']:.2f}s")
elif component_name == 'system':
cpu_data = component_data.get('cpu', {})
memory_data = component_data.get('memory', {})
disk_data = component_data.get('disk', {})
if cpu_data.get('usage_percent', 0) > self.health_thresholds['cpu_usage_threshold']:
alerts.append(f"WARNING: High CPU usage: {cpu_data['usage_percent']:.1f}%")
if memory_data.get('usage_percent', 0) > self.health_thresholds['memory_usage_threshold']:
alerts.append(f"WARNING: High memory usage: {memory_data['usage_percent']:.1f}%")
if disk_data.get('usage_percent', 0) > self.health_thresholds['disk_usage_threshold']:
alerts.append(f"WARNING: High disk usage: {disk_data['usage_percent']:.1f}%")
return alerts
except Exception as e:
logger.error(f"Error generating health alerts: {str(e)}")
return ['Error generating health alerts']
async def _generate_health_recommendations(self, components: Dict[str, Any]) -> List[str]:
"""Generate health recommendations based on component status."""
try:
recommendations = []
for component_name, component_data in components.items():
if isinstance(component_data, dict) and 'status' in component_data:
status = component_data['status']
if status == 'unhealthy':
if component_name == 'database':
recommendations.append("Investigate database connectivity and configuration")
elif component_name == 'cache':
recommendations.append("Check cache service configuration and connectivity")
elif component_name == 'ai_service':
recommendations.append("Verify AI service configuration and API keys")
elif component_name == 'system':
recommendations.append("Check system resources and restart if necessary")
elif status == 'degraded':
if component_name == 'database':
recommendations.append("Optimize database queries and add indexes")
elif component_name == 'cache':
recommendations.append("Consider cache optimization and memory allocation")
elif component_name == 'ai_service':
recommendations.append("Review AI service performance and rate limits")
elif component_name == 'system':
recommendations.append("Monitor system resources and consider scaling")
# Specific recommendations based on metrics
if component_name == 'database' and component_data.get('response_time', 0) > self.health_thresholds['database_response_time']:
recommendations.append("Add database indexes for frequently queried columns")
recommendations.append("Consider database connection pooling")
elif component_name == 'system':
cpu_data = component_data.get('cpu', {})
memory_data = component_data.get('memory', {})
disk_data = component_data.get('disk', {})
if cpu_data.get('usage_percent', 0) > self.health_thresholds['cpu_usage_threshold']:
recommendations.append("Consider scaling CPU resources or optimizing CPU-intensive operations")
if memory_data.get('usage_percent', 0) > self.health_thresholds['memory_usage_threshold']:
recommendations.append("Increase memory allocation or optimize memory usage")
if disk_data.get('usage_percent', 0) > self.health_thresholds['disk_usage_threshold']:
recommendations.append("Clean up disk space or increase storage capacity")
return recommendations
except Exception as e:
logger.error(f"Error generating health recommendations: {str(e)}")
return ['Unable to generate health recommendations']
async def get_health_history(self, hours: int = 24) -> List[Dict[str, Any]]:
"""Get health check history."""
try:
# This would typically query a database for historical health data
# For now, return the current health status
return [self.health_status] if self.health_status.get('timestamp') else []
except Exception as e:
logger.error(f"Error getting health history: {str(e)}")
return []
async def set_health_thresholds(self, thresholds: Dict[str, float]) -> bool:
"""Update health monitoring thresholds."""
try:
for key, value in thresholds.items():
if key in self.health_thresholds:
self.health_thresholds[key] = value
logger.info(f"Updated health threshold {key}: {value}")
return True
except Exception as e:
logger.error(f"Error setting health thresholds: {str(e)}")
return False
async def get_health_thresholds(self) -> Dict[str, float]:
"""Get current health monitoring thresholds."""
return self.health_thresholds.copy()
async def start_continuous_monitoring(self, interval_seconds: int = 300) -> None:
"""Start continuous health monitoring."""
try:
logger.info(f"Starting continuous health monitoring with {interval_seconds}s interval")
while True:
try:
# This would typically use the database session and services
# For now, just log that monitoring is active
logger.info("Continuous health monitoring check")
await asyncio.sleep(interval_seconds)
except Exception as e:
logger.error(f"Error in continuous health monitoring: {str(e)}")
await asyncio.sleep(60) # Wait 1 minute before retrying
except Exception as e:
logger.error(f"Error starting continuous monitoring: {str(e)}")

View File

@@ -0,0 +1,507 @@
"""
Optimization Service
Performance optimization and monitoring.
"""
import logging
import time
import asyncio
from typing import Dict, Any, List, Optional, Callable
from datetime import datetime, timedelta
from sqlalchemy.orm import Session
from sqlalchemy import text
logger = logging.getLogger(__name__)
class PerformanceOptimizationService:
"""Service for performance optimization and monitoring."""
def __init__(self):
self.performance_metrics = {
'response_times': {},
'database_queries': {},
'memory_usage': {},
'cache_hit_rates': {}
}
self.optimization_config = {
'max_response_time': 2.0, # seconds
'max_database_queries': 10,
'max_memory_usage': 512, # MB
'min_cache_hit_rate': 0.8
}
async def optimize_response_time(self, operation_name: str, operation_func: Callable, *args, **kwargs) -> Dict[str, Any]:
"""Optimize response time for operations."""
try:
start_time = time.time()
# Execute operation
result = await operation_func(*args, **kwargs)
end_time = time.time()
response_time = end_time - start_time
# Record performance metrics
self._record_response_time(operation_name, response_time)
# Check if optimization is needed
if response_time > self.optimization_config['max_response_time']:
optimization_suggestions = await self._suggest_response_time_optimizations(operation_name, response_time)
logger.warning(f"Slow response time for {operation_name}: {response_time:.2f}s")
else:
optimization_suggestions = []
return {
'result': result,
'response_time': response_time,
'optimization_suggestions': optimization_suggestions,
'performance_status': 'optimal' if response_time <= self.optimization_config['max_response_time'] else 'needs_optimization'
}
except Exception as e:
logger.error(f"Error optimizing response time for {operation_name}: {str(e)}")
return {
'result': None,
'response_time': 0.0,
'optimization_suggestions': ['Error occurred during operation'],
'performance_status': 'error'
}
async def optimize_database_queries(self, db: Session, query_func: Callable, *args, **kwargs) -> Dict[str, Any]:
"""Optimize database queries."""
try:
start_time = time.time()
query_count_before = self._get_query_count(db)
# Execute query function
result = await query_func(db, *args, **kwargs)
end_time = time.time()
query_count_after = self._get_query_count(db)
query_count = query_count_after - query_count_before
response_time = end_time - start_time
# Record database performance
self._record_database_performance(query_func.__name__, query_count, response_time)
# Check if optimization is needed
if query_count > self.optimization_config['max_database_queries']:
optimization_suggestions = await self._suggest_database_optimizations(query_func.__name__, query_count, response_time)
logger.warning(f"High query count for {query_func.__name__}: {query_count} queries")
else:
optimization_suggestions = []
return {
'result': result,
'query_count': query_count,
'response_time': response_time,
'optimization_suggestions': optimization_suggestions,
'performance_status': 'optimal' if query_count <= self.optimization_config['max_database_queries'] else 'needs_optimization'
}
except Exception as e:
logger.error(f"Error optimizing database queries for {query_func.__name__}: {str(e)}")
return {
'result': None,
'query_count': 0,
'response_time': 0.0,
'optimization_suggestions': ['Error occurred during database operation'],
'performance_status': 'error'
}
async def optimize_memory_usage(self, operation_name: str, operation_func: Callable, *args, **kwargs) -> Dict[str, Any]:
"""Optimize memory usage for operations."""
try:
import psutil
import os
process = psutil.Process(os.getpid())
memory_before = process.memory_info().rss / 1024 / 1024 # MB
# Execute operation
result = await operation_func(*args, **kwargs)
memory_after = process.memory_info().rss / 1024 / 1024 # MB
memory_used = memory_after - memory_before
# Record memory usage
self._record_memory_usage(operation_name, memory_used)
# Check if optimization is needed
if memory_used > self.optimization_config['max_memory_usage']:
optimization_suggestions = await self._suggest_memory_optimizations(operation_name, memory_used)
logger.warning(f"High memory usage for {operation_name}: {memory_used:.2f}MB")
else:
optimization_suggestions = []
return {
'result': result,
'memory_used_mb': memory_used,
'optimization_suggestions': optimization_suggestions,
'performance_status': 'optimal' if memory_used <= self.optimization_config['max_memory_usage'] else 'needs_optimization'
}
except Exception as e:
logger.error(f"Error optimizing memory usage for {operation_name}: {str(e)}")
return {
'result': None,
'memory_used_mb': 0.0,
'optimization_suggestions': ['Error occurred during memory optimization'],
'performance_status': 'error'
}
async def optimize_cache_performance(self, cache_service, operation_name: str) -> Dict[str, Any]:
"""Optimize cache performance."""
try:
# Get cache statistics
cache_stats = await cache_service.get_cache_stats()
# Calculate cache hit rates
hit_rates = {}
for cache_type, stats in cache_stats.items():
if stats.get('entries', 0) > 0:
# This is a simplified calculation - in practice, you'd track actual hits/misses
hit_rates[cache_type] = 0.8 # Placeholder
# Record cache performance
self._record_cache_performance(operation_name, hit_rates)
# Check if optimization is needed
optimization_suggestions = []
for cache_type, hit_rate in hit_rates.items():
if hit_rate < self.optimization_config['min_cache_hit_rate']:
optimization_suggestions.append(f"Low cache hit rate for {cache_type}: {hit_rate:.2%}")
return {
'cache_stats': cache_stats,
'hit_rates': hit_rates,
'optimization_suggestions': optimization_suggestions,
'performance_status': 'optimal' if not optimization_suggestions else 'needs_optimization'
}
except Exception as e:
logger.error(f"Error optimizing cache performance: {str(e)}")
return {
'cache_stats': {},
'hit_rates': {},
'optimization_suggestions': ['Error occurred during cache optimization'],
'performance_status': 'error'
}
def _record_response_time(self, operation_name: str, response_time: float) -> None:
"""Record response time metrics."""
try:
if operation_name not in self.performance_metrics['response_times']:
self.performance_metrics['response_times'][operation_name] = []
self.performance_metrics['response_times'][operation_name].append({
'response_time': response_time,
'timestamp': datetime.utcnow().isoformat()
})
# Keep only last 100 entries
if len(self.performance_metrics['response_times'][operation_name]) > 100:
self.performance_metrics['response_times'][operation_name] = self.performance_metrics['response_times'][operation_name][-100:]
except Exception as e:
logger.error(f"Error recording response time: {str(e)}")
def _record_database_performance(self, operation_name: str, query_count: int, response_time: float) -> None:
"""Record database performance metrics."""
try:
if operation_name not in self.performance_metrics['database_queries']:
self.performance_metrics['database_queries'][operation_name] = []
self.performance_metrics['database_queries'][operation_name].append({
'query_count': query_count,
'response_time': response_time,
'timestamp': datetime.utcnow().isoformat()
})
# Keep only last 100 entries
if len(self.performance_metrics['database_queries'][operation_name]) > 100:
self.performance_metrics['database_queries'][operation_name] = self.performance_metrics['database_queries'][operation_name][-100:]
except Exception as e:
logger.error(f"Error recording database performance: {str(e)}")
def _record_memory_usage(self, operation_name: str, memory_used: float) -> None:
"""Record memory usage metrics."""
try:
if operation_name not in self.performance_metrics['memory_usage']:
self.performance_metrics['memory_usage'][operation_name] = []
self.performance_metrics['memory_usage'][operation_name].append({
'memory_used_mb': memory_used,
'timestamp': datetime.utcnow().isoformat()
})
# Keep only last 100 entries
if len(self.performance_metrics['memory_usage'][operation_name]) > 100:
self.performance_metrics['memory_usage'][operation_name] = self.performance_metrics['memory_usage'][operation_name][-100:]
except Exception as e:
logger.error(f"Error recording memory usage: {str(e)}")
def _record_cache_performance(self, operation_name: str, hit_rates: Dict[str, float]) -> None:
"""Record cache performance metrics."""
try:
if operation_name not in self.performance_metrics['cache_hit_rates']:
self.performance_metrics['cache_hit_rates'][operation_name] = []
self.performance_metrics['cache_hit_rates'][operation_name].append({
'hit_rates': hit_rates,
'timestamp': datetime.utcnow().isoformat()
})
# Keep only last 100 entries
if len(self.performance_metrics['cache_hit_rates'][operation_name]) > 100:
self.performance_metrics['cache_hit_rates'][operation_name] = self.performance_metrics['cache_hit_rates'][operation_name][-100:]
except Exception as e:
logger.error(f"Error recording cache performance: {str(e)}")
def _get_query_count(self, db: Session) -> int:
"""Get current query count from database session."""
try:
# This is a simplified implementation
# In practice, you'd use database-specific monitoring tools
return 0
except Exception as e:
logger.error(f"Error getting query count: {str(e)}")
return 0
async def _suggest_response_time_optimizations(self, operation_name: str, response_time: float) -> List[str]:
"""Suggest optimizations for slow response times."""
try:
suggestions = []
if response_time > 5.0:
suggestions.append("Consider implementing caching for this operation")
suggestions.append("Review database query optimization")
suggestions.append("Consider async processing for heavy operations")
elif response_time > 2.0:
suggestions.append("Optimize database queries")
suggestions.append("Consider adding indexes for frequently accessed data")
suggestions.append("Review data processing algorithms")
# Add operation-specific suggestions
if 'ai_analysis' in operation_name.lower():
suggestions.append("Consider implementing AI response caching")
suggestions.append("Review AI service integration efficiency")
elif 'onboarding' in operation_name.lower():
suggestions.append("Optimize data transformation algorithms")
suggestions.append("Consider batch processing for large datasets")
return suggestions
except Exception as e:
logger.error(f"Error suggesting response time optimizations: {str(e)}")
return ["Unable to generate optimization suggestions"]
async def _suggest_database_optimizations(self, operation_name: str, query_count: int, response_time: float) -> List[str]:
"""Suggest optimizations for database performance."""
try:
suggestions = []
if query_count > 20:
suggestions.append("Implement query batching to reduce database calls")
suggestions.append("Review and optimize N+1 query patterns")
suggestions.append("Consider implementing database connection pooling")
elif query_count > 10:
suggestions.append("Optimize database queries with proper indexing")
suggestions.append("Consider implementing query result caching")
suggestions.append("Review database schema for optimization opportunities")
if response_time > 1.0:
suggestions.append("Add database indexes for frequently queried columns")
suggestions.append("Consider read replicas for heavy read operations")
suggestions.append("Optimize database connection settings")
# Add operation-specific suggestions
if 'strategy' in operation_name.lower():
suggestions.append("Consider implementing strategy data caching")
suggestions.append("Optimize strategy-related database queries")
elif 'onboarding' in operation_name.lower():
suggestions.append("Batch onboarding data processing")
suggestions.append("Optimize onboarding data retrieval queries")
return suggestions
except Exception as e:
logger.error(f"Error suggesting database optimizations: {str(e)}")
return ["Unable to generate database optimization suggestions"]
async def _suggest_memory_optimizations(self, operation_name: str, memory_used: float) -> List[str]:
"""Suggest optimizations for memory usage."""
try:
suggestions = []
if memory_used > 100:
suggestions.append("Implement data streaming for large datasets")
suggestions.append("Review memory-intensive data structures")
suggestions.append("Consider implementing pagination")
elif memory_used > 50:
suggestions.append("Optimize data processing algorithms")
suggestions.append("Review object lifecycle management")
suggestions.append("Consider implementing lazy loading")
# Add operation-specific suggestions
if 'ai_analysis' in operation_name.lower():
suggestions.append("Implement AI response streaming")
suggestions.append("Optimize AI model memory usage")
elif 'onboarding' in operation_name.lower():
suggestions.append("Process onboarding data in smaller chunks")
suggestions.append("Implement data cleanup after processing")
return suggestions
except Exception as e:
logger.error(f"Error suggesting memory optimizations: {str(e)}")
return ["Unable to generate memory optimization suggestions"]
async def get_performance_report(self) -> Dict[str, Any]:
"""Generate comprehensive performance report."""
try:
report = {
'timestamp': datetime.utcnow().isoformat(),
'response_times': self._calculate_average_response_times(),
'database_performance': self._calculate_database_performance(),
'memory_usage': self._calculate_memory_usage(),
'cache_performance': self._calculate_cache_performance(),
'optimization_recommendations': await self._generate_optimization_recommendations()
}
return report
except Exception as e:
logger.error(f"Error generating performance report: {str(e)}")
return {
'timestamp': datetime.utcnow().isoformat(),
'error': str(e)
}
def _calculate_average_response_times(self) -> Dict[str, float]:
"""Calculate average response times for operations."""
try:
averages = {}
for operation_name, times in self.performance_metrics['response_times'].items():
if times:
avg_time = sum(t['response_time'] for t in times) / len(times)
averages[operation_name] = avg_time
return averages
except Exception as e:
logger.error(f"Error calculating average response times: {str(e)}")
return {}
def _calculate_database_performance(self) -> Dict[str, Dict[str, float]]:
"""Calculate database performance metrics."""
try:
performance = {}
for operation_name, queries in self.performance_metrics['database_queries'].items():
if queries:
avg_queries = sum(q['query_count'] for q in queries) / len(queries)
avg_time = sum(q['response_time'] for q in queries) / len(queries)
performance[operation_name] = {
'average_queries': avg_queries,
'average_response_time': avg_time
}
return performance
except Exception as e:
logger.error(f"Error calculating database performance: {str(e)}")
return {}
def _calculate_memory_usage(self) -> Dict[str, float]:
"""Calculate average memory usage for operations."""
try:
averages = {}
for operation_name, usage in self.performance_metrics['memory_usage'].items():
if usage:
avg_memory = sum(u['memory_used_mb'] for u in usage) / len(usage)
averages[operation_name] = avg_memory
return averages
except Exception as e:
logger.error(f"Error calculating memory usage: {str(e)}")
return {}
def _calculate_cache_performance(self) -> Dict[str, float]:
"""Calculate cache performance metrics."""
try:
performance = {}
for operation_name, rates in self.performance_metrics['cache_hit_rates'].items():
if rates:
# Calculate average hit rate across all cache types
all_rates = []
for rate_data in rates:
if rate_data['hit_rates']:
avg_rate = sum(rate_data['hit_rates'].values()) / len(rate_data['hit_rates'])
all_rates.append(avg_rate)
if all_rates:
performance[operation_name] = sum(all_rates) / len(all_rates)
return performance
except Exception as e:
logger.error(f"Error calculating cache performance: {str(e)}")
return {}
async def _generate_optimization_recommendations(self) -> List[str]:
"""Generate optimization recommendations based on performance data."""
try:
recommendations = []
# Check response times
avg_response_times = self._calculate_average_response_times()
for operation, avg_time in avg_response_times.items():
if avg_time > self.optimization_config['max_response_time']:
recommendations.append(f"Optimize response time for {operation} (avg: {avg_time:.2f}s)")
# Check database performance
db_performance = self._calculate_database_performance()
for operation, perf in db_performance.items():
if perf['average_queries'] > self.optimization_config['max_database_queries']:
recommendations.append(f"Reduce database queries for {operation} (avg: {perf['average_queries']:.1f} queries)")
# Check memory usage
memory_usage = self._calculate_memory_usage()
for operation, memory in memory_usage.items():
if memory > self.optimization_config['max_memory_usage']:
recommendations.append(f"Optimize memory usage for {operation} (avg: {memory:.1f}MB)")
return recommendations
except Exception as e:
logger.error(f"Error generating optimization recommendations: {str(e)}")
return ["Unable to generate optimization recommendations"]
async def cleanup_old_metrics(self, days_to_keep: int = 30) -> Dict[str, int]:
"""Clean up old performance metrics."""
try:
cutoff_date = datetime.utcnow() - timedelta(days=days_to_keep)
cleaned_count = 0
for metric_type, operations in self.performance_metrics.items():
for operation_name, metrics in operations.items():
if isinstance(metrics, list):
original_count = len(metrics)
# Filter out old metrics
self.performance_metrics[metric_type][operation_name] = [
m for m in metrics
if datetime.fromisoformat(m['timestamp']) > cutoff_date
]
cleaned_count += original_count - len(self.performance_metrics[metric_type][operation_name])
logger.info(f"Cleaned up {cleaned_count} old performance metrics")
return {'cleaned_count': cleaned_count}
except Exception as e:
logger.error(f"Error cleaning up old metrics: {str(e)}")
return {'cleaned_count': 0}