ALwrity Version 0.5.0 (Fastapi + React )
This commit is contained in:
0
backend/api/content_planning/services/__init__.py
Normal file
0
backend/api/content_planning/services/__init__.py
Normal file
342
backend/api/content_planning/services/ai_analytics_service.py
Normal file
342
backend/api/content_planning/services/ai_analytics_service.py
Normal file
@@ -0,0 +1,342 @@
|
||||
"""
|
||||
AI Analytics Service for Content Planning API
|
||||
Extracted business logic from the AI analytics route for better separation of concerns.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session
|
||||
import time
|
||||
|
||||
# Import database services
|
||||
from services.content_planning_db import ContentPlanningDBService
|
||||
from services.ai_analysis_db_service import AIAnalysisDBService
|
||||
from services.ai_analytics_service import AIAnalyticsService
|
||||
from services.onboarding_data_service import OnboardingDataService
|
||||
|
||||
# Import utilities
|
||||
from ..utils.error_handlers import ContentPlanningErrorHandler
|
||||
from ..utils.response_builders import ResponseBuilder
|
||||
from ..utils.constants import ERROR_MESSAGES, SUCCESS_MESSAGES
|
||||
|
||||
class ContentPlanningAIAnalyticsService:
|
||||
"""Service class for AI analytics operations."""
|
||||
|
||||
def __init__(self):
|
||||
self.ai_analysis_db_service = AIAnalysisDBService()
|
||||
self.ai_analytics_service = AIAnalyticsService()
|
||||
self.onboarding_service = OnboardingDataService()
|
||||
|
||||
async def analyze_content_evolution(self, strategy_id: int, time_period: str = "30d") -> Dict[str, Any]:
|
||||
"""Analyze content evolution over time for a specific strategy."""
|
||||
try:
|
||||
logger.info(f"Starting content evolution analysis for strategy {strategy_id}")
|
||||
|
||||
# Perform content evolution analysis
|
||||
evolution_analysis = await self.ai_analytics_service.analyze_content_evolution(
|
||||
strategy_id=strategy_id,
|
||||
time_period=time_period
|
||||
)
|
||||
|
||||
# Prepare response
|
||||
response_data = {
|
||||
'analysis_type': 'content_evolution',
|
||||
'strategy_id': strategy_id,
|
||||
'results': evolution_analysis,
|
||||
'recommendations': evolution_analysis.get('recommendations', []),
|
||||
'analysis_date': datetime.utcnow()
|
||||
}
|
||||
|
||||
logger.info(f"Content evolution analysis completed for strategy {strategy_id}")
|
||||
return response_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing content evolution: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "analyze_content_evolution")
|
||||
|
||||
async def analyze_performance_trends(self, strategy_id: int, metrics: Optional[List[str]] = None) -> Dict[str, Any]:
|
||||
"""Analyze performance trends for content strategy."""
|
||||
try:
|
||||
logger.info(f"Starting performance trends analysis for strategy {strategy_id}")
|
||||
|
||||
# Perform performance trends analysis
|
||||
trends_analysis = await self.ai_analytics_service.analyze_performance_trends(
|
||||
strategy_id=strategy_id,
|
||||
metrics=metrics
|
||||
)
|
||||
|
||||
# Prepare response
|
||||
response_data = {
|
||||
'analysis_type': 'performance_trends',
|
||||
'strategy_id': strategy_id,
|
||||
'results': trends_analysis,
|
||||
'recommendations': trends_analysis.get('recommendations', []),
|
||||
'analysis_date': datetime.utcnow()
|
||||
}
|
||||
|
||||
logger.info(f"Performance trends analysis completed for strategy {strategy_id}")
|
||||
return response_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing performance trends: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "analyze_performance_trends")
|
||||
|
||||
async def predict_content_performance(self, strategy_id: int, content_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Predict content performance using AI models."""
|
||||
try:
|
||||
logger.info(f"Starting content performance prediction for strategy {strategy_id}")
|
||||
|
||||
# Perform content performance prediction
|
||||
prediction_results = await self.ai_analytics_service.predict_content_performance(
|
||||
content_data=content_data,
|
||||
strategy_id=strategy_id
|
||||
)
|
||||
|
||||
# Prepare response
|
||||
response_data = {
|
||||
'analysis_type': 'content_performance_prediction',
|
||||
'strategy_id': strategy_id,
|
||||
'results': prediction_results,
|
||||
'recommendations': prediction_results.get('optimization_recommendations', []),
|
||||
'analysis_date': datetime.utcnow()
|
||||
}
|
||||
|
||||
logger.info(f"Content performance prediction completed for strategy {strategy_id}")
|
||||
return response_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error predicting content performance: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "predict_content_performance")
|
||||
|
||||
async def generate_strategic_intelligence(self, strategy_id: int, market_data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""Generate strategic intelligence for content planning."""
|
||||
try:
|
||||
logger.info(f"Starting strategic intelligence generation for strategy {strategy_id}")
|
||||
|
||||
# Generate strategic intelligence
|
||||
intelligence_results = await self.ai_analytics_service.generate_strategic_intelligence(
|
||||
strategy_id=strategy_id,
|
||||
market_data=market_data
|
||||
)
|
||||
|
||||
# Prepare response
|
||||
response_data = {
|
||||
'analysis_type': 'strategic_intelligence',
|
||||
'strategy_id': strategy_id,
|
||||
'results': intelligence_results,
|
||||
'recommendations': [], # Strategic intelligence includes its own recommendations
|
||||
'analysis_date': datetime.utcnow()
|
||||
}
|
||||
|
||||
logger.info(f"Strategic intelligence generation completed for strategy {strategy_id}")
|
||||
return response_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating strategic intelligence: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "generate_strategic_intelligence")
|
||||
|
||||
async def get_ai_analytics(self, user_id: Optional[int] = None, strategy_id: Optional[int] = None, force_refresh: bool = False) -> Dict[str, Any]:
|
||||
"""Get AI analytics with real personalized insights - Database first approach."""
|
||||
try:
|
||||
logger.info(f"🚀 Starting AI analytics for user: {user_id}, strategy: {strategy_id}, force_refresh: {force_refresh}")
|
||||
start_time = time.time()
|
||||
|
||||
# Use user_id or default to 1
|
||||
current_user_id = user_id or 1
|
||||
|
||||
# Skip database check if force_refresh is True
|
||||
if not force_refresh:
|
||||
# First, try to get existing AI analysis from database
|
||||
logger.info(f"🔍 Checking database for existing AI analysis for user {current_user_id}")
|
||||
existing_analysis = await self.ai_analysis_db_service.get_latest_ai_analysis(
|
||||
user_id=current_user_id,
|
||||
analysis_type="comprehensive_analysis",
|
||||
strategy_id=strategy_id,
|
||||
max_age_hours=24 # Use cached results up to 24 hours old
|
||||
)
|
||||
|
||||
if existing_analysis:
|
||||
logger.info(f"✅ Found existing AI analysis in database: {existing_analysis.get('id', 'unknown')}")
|
||||
|
||||
# Return cached results
|
||||
return {
|
||||
"insights": existing_analysis.get('insights', []),
|
||||
"recommendations": existing_analysis.get('recommendations', []),
|
||||
"total_insights": len(existing_analysis.get('insights', [])),
|
||||
"total_recommendations": len(existing_analysis.get('recommendations', [])),
|
||||
"generated_at": existing_analysis.get('created_at', datetime.utcnow()).isoformat(),
|
||||
"ai_service_status": existing_analysis.get('ai_service_status', 'operational'),
|
||||
"processing_time": f"{existing_analysis.get('processing_time', 0):.2f}s" if existing_analysis.get('processing_time') else "cached",
|
||||
"personalized_data_used": True if existing_analysis.get('personalized_data_used') else False,
|
||||
"data_source": "database_cache",
|
||||
"cache_age_hours": (datetime.utcnow() - existing_analysis.get('created_at', datetime.utcnow())).total_seconds() / 3600,
|
||||
"user_profile": existing_analysis.get('personalized_data_used', {})
|
||||
}
|
||||
|
||||
# No recent analysis found or force refresh requested, run new AI analysis
|
||||
logger.info(f"🔄 Running new AI analysis for user {current_user_id} (force_refresh: {force_refresh})")
|
||||
|
||||
# Get personalized inputs from onboarding data
|
||||
personalized_inputs = self.onboarding_service.get_personalized_ai_inputs(current_user_id)
|
||||
|
||||
logger.info(f"📊 Using personalized inputs: {len(personalized_inputs)} data points")
|
||||
|
||||
# Generate real AI insights using personalized data
|
||||
logger.info("🔍 Generating performance analysis...")
|
||||
performance_analysis = await self.ai_analytics_service.analyze_performance_trends(
|
||||
strategy_id=strategy_id or 1
|
||||
)
|
||||
|
||||
logger.info("🧠 Generating strategic intelligence...")
|
||||
strategic_intelligence = await self.ai_analytics_service.generate_strategic_intelligence(
|
||||
strategy_id=strategy_id or 1
|
||||
)
|
||||
|
||||
logger.info("📈 Analyzing content evolution...")
|
||||
evolution_analysis = await self.ai_analytics_service.analyze_content_evolution(
|
||||
strategy_id=strategy_id or 1
|
||||
)
|
||||
|
||||
# Combine all insights
|
||||
insights = []
|
||||
recommendations = []
|
||||
|
||||
if performance_analysis:
|
||||
insights.extend(performance_analysis.get('insights', []))
|
||||
if strategic_intelligence:
|
||||
insights.extend(strategic_intelligence.get('insights', []))
|
||||
if evolution_analysis:
|
||||
insights.extend(evolution_analysis.get('insights', []))
|
||||
|
||||
total_time = time.time() - start_time
|
||||
logger.info(f"🎉 AI analytics completed in {total_time:.2f}s: {len(insights)} insights, {len(recommendations)} recommendations")
|
||||
|
||||
# Store results in database
|
||||
try:
|
||||
await self.ai_analysis_db_service.store_ai_analysis_result(
|
||||
user_id=current_user_id,
|
||||
analysis_type="comprehensive_analysis",
|
||||
insights=insights,
|
||||
recommendations=recommendations,
|
||||
performance_metrics=performance_analysis,
|
||||
personalized_data=personalized_inputs,
|
||||
processing_time=total_time,
|
||||
strategy_id=strategy_id,
|
||||
ai_service_status="operational" if len(insights) > 0 else "fallback"
|
||||
)
|
||||
logger.info(f"💾 AI analysis results stored in database for user {current_user_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Failed to store AI analysis in database: {str(e)}")
|
||||
|
||||
return {
|
||||
"insights": insights,
|
||||
"recommendations": recommendations,
|
||||
"total_insights": len(insights),
|
||||
"total_recommendations": len(recommendations),
|
||||
"generated_at": datetime.utcnow().isoformat(),
|
||||
"ai_service_status": "operational" if len(insights) > 0 else "fallback",
|
||||
"processing_time": f"{total_time:.2f}s",
|
||||
"personalized_data_used": True,
|
||||
"data_source": "ai_analysis",
|
||||
"user_profile": {
|
||||
"website_url": personalized_inputs.get('website_analysis', {}).get('website_url', ''),
|
||||
"content_types": personalized_inputs.get('website_analysis', {}).get('content_types', []),
|
||||
"target_audience": personalized_inputs.get('website_analysis', {}).get('target_audience', []),
|
||||
"industry_focus": personalized_inputs.get('website_analysis', {}).get('industry_focus', 'general')
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error generating AI analytics: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "get_ai_analytics")
|
||||
|
||||
async def get_user_ai_analysis_results(self, user_id: int, analysis_type: Optional[str] = None, limit: int = 10) -> Dict[str, Any]:
|
||||
"""Get AI analysis results for a specific user."""
|
||||
try:
|
||||
logger.info(f"Fetching AI analysis results for user {user_id}")
|
||||
|
||||
analysis_types = [analysis_type] if analysis_type else None
|
||||
results = await self.ai_analysis_db_service.get_user_ai_analyses(
|
||||
user_id=user_id,
|
||||
analysis_types=analysis_types,
|
||||
limit=limit
|
||||
)
|
||||
|
||||
return {
|
||||
"user_id": user_id,
|
||||
"results": [result.to_dict() for result in results],
|
||||
"total_results": len(results)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching AI analysis results: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "get_user_ai_analysis_results")
|
||||
|
||||
async def refresh_ai_analysis(self, user_id: int, analysis_type: str, strategy_id: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""Force refresh of AI analysis for a user."""
|
||||
try:
|
||||
logger.info(f"Force refreshing AI analysis for user {user_id}, type: {analysis_type}")
|
||||
|
||||
# Delete existing analysis to force refresh
|
||||
await self.ai_analysis_db_service.delete_old_ai_analyses(days_old=0)
|
||||
|
||||
# Run new analysis based on type
|
||||
if analysis_type == "comprehensive_analysis":
|
||||
# This will trigger a new comprehensive analysis
|
||||
return {"message": f"AI analysis refresh initiated for user {user_id}"}
|
||||
elif analysis_type == "gap_analysis":
|
||||
# This will trigger a new gap analysis
|
||||
return {"message": f"Gap analysis refresh initiated for user {user_id}"}
|
||||
elif analysis_type == "strategic_intelligence":
|
||||
# This will trigger a new strategic intelligence analysis
|
||||
return {"message": f"Strategic intelligence refresh initiated for user {user_id}"}
|
||||
else:
|
||||
raise Exception(f"Unknown analysis type: {analysis_type}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error refreshing AI analysis: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "refresh_ai_analysis")
|
||||
|
||||
async def clear_ai_analysis_cache(self, user_id: int, analysis_type: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Clear AI analysis cache for a user."""
|
||||
try:
|
||||
logger.info(f"Clearing AI analysis cache for user {user_id}")
|
||||
|
||||
if analysis_type:
|
||||
# Clear specific analysis type
|
||||
deleted_count = await self.ai_analysis_db_service.delete_old_ai_analyses(days_old=0)
|
||||
return {"message": f"Cleared {deleted_count} cached results for user {user_id}"}
|
||||
else:
|
||||
# Clear all cached results
|
||||
deleted_count = await self.ai_analysis_db_service.delete_old_ai_analyses(days_old=0)
|
||||
return {"message": f"Cleared {deleted_count} cached results for user {user_id}"}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing AI analysis cache: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "clear_ai_analysis_cache")
|
||||
|
||||
async def get_ai_analysis_statistics(self, user_id: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""Get AI analysis statistics."""
|
||||
try:
|
||||
logger.info(f"📊 Getting AI analysis statistics for user: {user_id}")
|
||||
|
||||
if user_id:
|
||||
# Get user-specific statistics
|
||||
user_stats = await self.ai_analysis_db_service.get_analysis_statistics(user_id)
|
||||
return {
|
||||
"user_id": user_id,
|
||||
"statistics": user_stats,
|
||||
"message": "User-specific AI analysis statistics retrieved successfully"
|
||||
}
|
||||
else:
|
||||
# Get global statistics
|
||||
global_stats = await self.ai_analysis_db_service.get_analysis_statistics()
|
||||
return {
|
||||
"statistics": global_stats,
|
||||
"message": "Global AI analysis statistics retrieved successfully"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error getting AI analysis statistics: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "get_ai_analysis_statistics")
|
||||
@@ -0,0 +1,407 @@
|
||||
"""
|
||||
Calendar Generation Service for Content Planning API
|
||||
Extracted business logic from the calendar generation route for better separation of concerns.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session
|
||||
import time
|
||||
|
||||
# Import database service
|
||||
from services.content_planning_db import ContentPlanningDBService
|
||||
|
||||
# Import calendar generator service
|
||||
from services.calendar_generator_service import CalendarGeneratorService
|
||||
|
||||
# Import validation service
|
||||
from services.validation import check_all_api_keys
|
||||
|
||||
# Import utilities
|
||||
from ..utils.error_handlers import ContentPlanningErrorHandler
|
||||
from ..utils.response_builders import ResponseBuilder
|
||||
from ..utils.constants import ERROR_MESSAGES, SUCCESS_MESSAGES
|
||||
|
||||
class CalendarGenerationService:
|
||||
"""Service class for calendar generation operations."""
|
||||
|
||||
def __init__(self):
|
||||
self.calendar_generator_service = CalendarGeneratorService()
|
||||
|
||||
async def generate_comprehensive_calendar(self, user_id: int, strategy_id: Optional[int] = None,
|
||||
calendar_type: str = "monthly", industry: Optional[str] = None,
|
||||
business_size: str = "sme") -> Dict[str, Any]:
|
||||
"""Generate a comprehensive AI-powered content calendar using database insights."""
|
||||
try:
|
||||
logger.info(f"🎯 Generating comprehensive calendar for user {user_id}")
|
||||
start_time = time.time()
|
||||
|
||||
# Generate calendar using advanced AI-powered method
|
||||
calendar_data = await self.calendar_generator_service.generate_ai_powered_calendar(
|
||||
user_id=user_id,
|
||||
strategy_id=strategy_id,
|
||||
calendar_type=calendar_type,
|
||||
industry=industry,
|
||||
business_size=business_size
|
||||
)
|
||||
|
||||
processing_time = time.time() - start_time
|
||||
|
||||
logger.info(f"✅ Calendar generated successfully in {processing_time:.2f}s")
|
||||
return calendar_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error generating comprehensive calendar: {str(e)}")
|
||||
logger.error(f"Exception type: {type(e)}")
|
||||
import traceback
|
||||
logger.error(f"Traceback: {traceback.format_exc()}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "generate_comprehensive_calendar")
|
||||
|
||||
async def optimize_content_for_platform(self, user_id: int, title: str, description: str,
|
||||
content_type: str, target_platform: str, event_id: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""Optimize content for specific platforms using database insights."""
|
||||
try:
|
||||
logger.info(f"🔧 Starting content optimization for user {user_id}")
|
||||
|
||||
# Validate API keys - temporarily disabled for testing
|
||||
# from services.api_key_manager import APIKeyManager
|
||||
# api_manager = APIKeyManager()
|
||||
# api_key_status = check_all_api_keys(api_manager)
|
||||
# if not api_key_status.get("all_valid", False):
|
||||
# raise Exception("AI services are not properly configured")
|
||||
|
||||
# Get user data for optimization
|
||||
user_data = await self.calendar_generator_service._get_comprehensive_user_data(
|
||||
user_id,
|
||||
None # No strategy_id for content optimization
|
||||
)
|
||||
|
||||
# Create optimization request for AI
|
||||
optimization_prompt = f"""
|
||||
Optimize the following content for {target_platform}:
|
||||
|
||||
Original Content:
|
||||
- Title: {title}
|
||||
- Description: {description}
|
||||
- Content Type: {content_type}
|
||||
- Platform: {target_platform}
|
||||
|
||||
User Context:
|
||||
- Industry: {user_data.get('industry', 'technology')}
|
||||
- Target Audience: {user_data.get('target_audience', {})}
|
||||
- Performance Data: {user_data.get('performance_data', {})}
|
||||
- Gap Analysis: {user_data.get('gap_analysis', {})}
|
||||
|
||||
Provide comprehensive optimization including:
|
||||
1. Platform-specific adaptations
|
||||
2. Visual recommendations
|
||||
3. Hashtag suggestions
|
||||
4. Keyword optimization
|
||||
5. Tone adjustments
|
||||
6. Length optimization
|
||||
7. Performance predictions
|
||||
"""
|
||||
|
||||
# Generate optimization using AI
|
||||
optimization_result = await self.calendar_generator_service.ai_engine.generate_content_recommendations(
|
||||
analysis_data={
|
||||
"original_content": {
|
||||
"title": title,
|
||||
"description": description,
|
||||
"content_type": content_type,
|
||||
"target_platform": target_platform
|
||||
},
|
||||
"user_context": {
|
||||
"industry": user_data.get('industry', 'technology'),
|
||||
"target_audience": user_data.get('target_audience', {}),
|
||||
"performance_data": user_data.get('performance_data', {}),
|
||||
"gap_analysis": user_data.get('gap_analysis', {})
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Prepare response
|
||||
response_data = {
|
||||
"user_id": user_id,
|
||||
"event_id": event_id,
|
||||
"original_content": {
|
||||
"title": title,
|
||||
"description": description,
|
||||
"content_type": content_type,
|
||||
"target_platform": target_platform
|
||||
},
|
||||
"optimized_content": {
|
||||
"title": title,
|
||||
"description": description,
|
||||
"content_type": content_type,
|
||||
"target_platform": target_platform
|
||||
},
|
||||
"platform_adaptations": [rec.get('description', '') for rec in optimization_result[:3]],
|
||||
"visual_recommendations": ["Use engaging visuals", "Include relevant images", "Optimize for mobile"],
|
||||
"hashtag_suggestions": ["#content", "#marketing", "#digital"],
|
||||
"keyword_optimization": {"primary": "content", "secondary": ["marketing", "digital"]},
|
||||
"tone_adjustments": {"tone": "professional", "style": "informative"},
|
||||
"length_optimization": {"optimal_length": "150-300 words", "format": "paragraphs"},
|
||||
"performance_prediction": {"engagement_rate": 0.05, "reach": 1000},
|
||||
"optimization_score": 0.8,
|
||||
"created_at": datetime.utcnow()
|
||||
}
|
||||
|
||||
logger.info(f"✅ Content optimization completed for user {user_id}")
|
||||
return response_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error optimizing content: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "optimize_content_for_platform")
|
||||
|
||||
async def predict_content_performance(self, user_id: int, content_type: str, platform: str,
|
||||
content_data: Dict[str, Any], strategy_id: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""Predict content performance using database insights."""
|
||||
try:
|
||||
logger.info(f"📊 Starting performance prediction for user {user_id}")
|
||||
|
||||
# Get user data for prediction
|
||||
user_data = await self.calendar_generator_service._get_comprehensive_user_data(
|
||||
user_id,
|
||||
strategy_id
|
||||
)
|
||||
|
||||
# Generate performance prediction
|
||||
prediction_prompt = f"""
|
||||
Predict performance for the following content:
|
||||
|
||||
Content Data:
|
||||
- Content Type: {content_type}
|
||||
- Platform: {platform}
|
||||
- Content Data: {content_data}
|
||||
|
||||
User Context:
|
||||
- Industry: {user_data.get('industry', 'technology')}
|
||||
- Performance Data: {user_data.get('performance_data', {})}
|
||||
- Gap Analysis: {user_data.get('gap_analysis', {})}
|
||||
- Audience Insights: {user_data.get('onboarding_data', {}).get('target_audience', {})}
|
||||
|
||||
Provide performance predictions including:
|
||||
1. Engagement rate
|
||||
2. Reach estimates
|
||||
3. Conversion predictions
|
||||
4. ROI estimates
|
||||
5. Confidence score
|
||||
6. Recommendations
|
||||
"""
|
||||
|
||||
# Generate prediction using AI
|
||||
prediction_result = await self.calendar_generator_service.ai_engine.generate_structured_response(
|
||||
prompt=prediction_prompt,
|
||||
schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"predicted_engagement_rate": {"type": "number"},
|
||||
"predicted_reach": {"type": "integer"},
|
||||
"predicted_conversions": {"type": "integer"},
|
||||
"predicted_roi": {"type": "number"},
|
||||
"confidence_score": {"type": "number"},
|
||||
"recommendations": {"type": "array", "items": {"type": "string"}}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Prepare response
|
||||
response_data = {
|
||||
"user_id": user_id,
|
||||
"strategy_id": strategy_id,
|
||||
"content_type": content_type,
|
||||
"platform": platform,
|
||||
"predicted_engagement_rate": prediction_result.get("predicted_engagement_rate", 0.05),
|
||||
"predicted_reach": prediction_result.get("predicted_reach", 1000),
|
||||
"predicted_conversions": prediction_result.get("predicted_conversions", 10),
|
||||
"predicted_roi": prediction_result.get("predicted_roi", 2.5),
|
||||
"confidence_score": prediction_result.get("confidence_score", 0.75),
|
||||
"recommendations": prediction_result.get("recommendations", []),
|
||||
"created_at": datetime.utcnow()
|
||||
}
|
||||
|
||||
logger.info(f"✅ Performance prediction completed for user {user_id}")
|
||||
return response_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error predicting content performance: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "predict_content_performance")
|
||||
|
||||
async def repurpose_content_across_platforms(self, user_id: int, original_content: Dict[str, Any],
|
||||
target_platforms: List[str], strategy_id: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""Repurpose content across different platforms using database insights."""
|
||||
try:
|
||||
logger.info(f"🔄 Starting content repurposing for user {user_id}")
|
||||
|
||||
# Get user data for repurposing
|
||||
user_data = await self.calendar_generator_service._get_comprehensive_user_data(
|
||||
user_id,
|
||||
strategy_id
|
||||
)
|
||||
|
||||
# Generate repurposing suggestions
|
||||
repurposing_prompt = f"""
|
||||
Repurpose the following content for multiple platforms:
|
||||
|
||||
Original Content:
|
||||
{original_content}
|
||||
|
||||
Target Platforms:
|
||||
{target_platforms}
|
||||
|
||||
User Context:
|
||||
- Gap Analysis: {user_data.get('gap_analysis', {})}
|
||||
- Strategy Data: {user_data.get('strategy_data', {})}
|
||||
- Recommendations: {user_data.get('recommendations_data', [])}
|
||||
|
||||
Provide repurposing suggestions including:
|
||||
1. Platform-specific adaptations
|
||||
2. Content transformations
|
||||
3. Implementation tips
|
||||
4. Gap addressing opportunities
|
||||
"""
|
||||
|
||||
# Generate repurposing suggestions using AI
|
||||
repurposing_result = await self.calendar_generator_service.ai_engine.generate_structured_response(
|
||||
prompt=repurposing_prompt,
|
||||
schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"platform_adaptations": {"type": "array", "items": {"type": "object"}},
|
||||
"transformations": {"type": "array", "items": {"type": "object"}},
|
||||
"implementation_tips": {"type": "array", "items": {"type": "string"}},
|
||||
"gap_addresses": {"type": "array", "items": {"type": "string"}}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Prepare response
|
||||
response_data = {
|
||||
"user_id": user_id,
|
||||
"strategy_id": strategy_id,
|
||||
"original_content": original_content,
|
||||
"platform_adaptations": repurposing_result.get("platform_adaptations", []),
|
||||
"transformations": repurposing_result.get("transformations", []),
|
||||
"implementation_tips": repurposing_result.get("implementation_tips", []),
|
||||
"gap_addresses": repurposing_result.get("gap_addresses", []),
|
||||
"created_at": datetime.utcnow()
|
||||
}
|
||||
|
||||
logger.info(f"✅ Content repurposing completed for user {user_id}")
|
||||
return response_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error repurposing content: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "repurpose_content_across_platforms")
|
||||
|
||||
async def get_trending_topics(self, user_id: int, industry: str, limit: int = 10) -> Dict[str, Any]:
|
||||
"""Get trending topics relevant to the user's industry and content gaps."""
|
||||
try:
|
||||
logger.info(f"📈 Getting trending topics for user {user_id} in {industry}")
|
||||
|
||||
# Get user data for trending topics
|
||||
user_data = await self.calendar_generator_service._get_comprehensive_user_data(user_id, None)
|
||||
|
||||
# Get trending topics with database insights
|
||||
trending_topics = await self.calendar_generator_service._get_trending_topics_from_db(industry, user_data)
|
||||
|
||||
# Limit results
|
||||
limited_topics = trending_topics[:limit]
|
||||
|
||||
# Calculate relevance scores
|
||||
gap_relevance_scores = {}
|
||||
audience_alignment_scores = {}
|
||||
|
||||
for topic in limited_topics:
|
||||
topic_key = topic.get("keyword", "")
|
||||
gap_relevance_scores[topic_key] = self.calendar_generator_service._assess_gap_relevance(topic, user_data.get("gap_analysis", {}))
|
||||
audience_alignment_scores[topic_key] = self.calendar_generator_service._assess_audience_alignment(topic, user_data.get("onboarding_data", {}))
|
||||
|
||||
# Prepare response
|
||||
response_data = {
|
||||
"user_id": user_id,
|
||||
"industry": industry,
|
||||
"trending_topics": limited_topics,
|
||||
"gap_relevance_scores": gap_relevance_scores,
|
||||
"audience_alignment_scores": audience_alignment_scores,
|
||||
"created_at": datetime.utcnow()
|
||||
}
|
||||
|
||||
logger.info(f"✅ Trending topics retrieved for user {user_id}")
|
||||
return response_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error getting trending topics: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "get_trending_topics")
|
||||
|
||||
async def get_comprehensive_user_data(self, user_id: int) -> Dict[str, Any]:
|
||||
"""Get comprehensive user data for calendar generation."""
|
||||
try:
|
||||
logger.info(f"Getting comprehensive user data for user_id: {user_id}")
|
||||
|
||||
# Get comprehensive data using the calendar generator service
|
||||
logger.info("Calling calendar generator service...")
|
||||
comprehensive_data = await self.calendar_generator_service._get_comprehensive_user_data(user_id, None)
|
||||
logger.info(f"Calendar generator service returned: {type(comprehensive_data)}")
|
||||
|
||||
logger.info(f"Successfully retrieved comprehensive user data for user_id: {user_id}")
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"data": comprehensive_data,
|
||||
"message": "Comprehensive user data retrieved successfully",
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting comprehensive user data for user_id {user_id}: {str(e)}")
|
||||
logger.error(f"Exception type: {type(e)}")
|
||||
import traceback
|
||||
logger.error(f"Traceback: {traceback.format_exc()}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "get_comprehensive_user_data")
|
||||
|
||||
async def health_check(self) -> Dict[str, Any]:
|
||||
"""Health check for calendar generation services."""
|
||||
try:
|
||||
logger.info("🏥 Performing calendar generation health check")
|
||||
|
||||
# Check AI services
|
||||
from services.api_key_manager import APIKeyManager
|
||||
api_manager = APIKeyManager()
|
||||
api_key_status = check_all_api_keys(api_manager)
|
||||
|
||||
# Check database connectivity
|
||||
db_status = "healthy"
|
||||
try:
|
||||
# Test database connection - only if calendar generator service is properly initialized
|
||||
if hasattr(self.calendar_generator_service, 'content_planning_db_service') and self.calendar_generator_service.content_planning_db_service is not None:
|
||||
await self.calendar_generator_service.content_planning_db_service.get_user_content_gap_analyses(1)
|
||||
else:
|
||||
db_status = "not_initialized"
|
||||
except Exception as e:
|
||||
db_status = f"error: {str(e)}"
|
||||
|
||||
health_status = {
|
||||
"service": "calendar_generation",
|
||||
"status": "healthy" if api_key_status.get("all_valid", False) and db_status == "healthy" else "unhealthy",
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"components": {
|
||||
"ai_services": "healthy" if api_key_status.get("all_valid", False) else "unhealthy",
|
||||
"database": db_status,
|
||||
"calendar_generator": "healthy"
|
||||
},
|
||||
"api_keys": api_key_status
|
||||
}
|
||||
|
||||
logger.info("✅ Calendar generation health check completed")
|
||||
return health_status
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Calendar generation health check failed: {str(e)}")
|
||||
return {
|
||||
"service": "calendar_generation",
|
||||
"status": "unhealthy",
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"error": str(e)
|
||||
}
|
||||
184
backend/api/content_planning/services/calendar_service.py
Normal file
184
backend/api/content_planning/services/calendar_service.py
Normal file
@@ -0,0 +1,184 @@
|
||||
"""
|
||||
Calendar Service for Content Planning API
|
||||
Extracted business logic from the calendar events route for better separation of concerns.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
# Import database service
|
||||
from services.content_planning_db import ContentPlanningDBService
|
||||
|
||||
# Import utilities
|
||||
from ..utils.error_handlers import ContentPlanningErrorHandler
|
||||
from ..utils.response_builders import ResponseBuilder
|
||||
from ..utils.constants import ERROR_MESSAGES, SUCCESS_MESSAGES
|
||||
|
||||
class CalendarService:
|
||||
"""Service class for calendar event operations."""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
async def create_calendar_event(self, event_data: Dict[str, Any], db: Session) -> Dict[str, Any]:
|
||||
"""Create a new calendar event."""
|
||||
try:
|
||||
logger.info(f"Creating calendar event: {event_data.get('title', 'Unknown')}")
|
||||
|
||||
db_service = ContentPlanningDBService(db)
|
||||
created_event = await db_service.create_calendar_event(event_data)
|
||||
|
||||
if created_event:
|
||||
logger.info(f"Calendar event created successfully: {created_event.id}")
|
||||
return created_event.to_dict()
|
||||
else:
|
||||
raise Exception("Failed to create calendar event")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating calendar event: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "create_calendar_event")
|
||||
|
||||
async def get_calendar_events(self, strategy_id: Optional[int] = None, db: Session = None) -> List[Dict[str, Any]]:
|
||||
"""Get calendar events, optionally filtered by strategy."""
|
||||
try:
|
||||
logger.info("Fetching calendar events")
|
||||
|
||||
db_service = ContentPlanningDBService(db)
|
||||
|
||||
if strategy_id:
|
||||
events = await db_service.get_strategy_calendar_events(strategy_id)
|
||||
else:
|
||||
# TODO: Implement get_all_calendar_events method
|
||||
events = []
|
||||
|
||||
return [event.to_dict() for event in events]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting calendar events: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "get_calendar_events")
|
||||
|
||||
async def get_calendar_event_by_id(self, event_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Get a specific calendar event by ID."""
|
||||
try:
|
||||
logger.info(f"Fetching calendar event: {event_id}")
|
||||
|
||||
db_service = ContentPlanningDBService(db)
|
||||
event = await db_service.get_calendar_event(event_id)
|
||||
|
||||
if event:
|
||||
return event.to_dict()
|
||||
else:
|
||||
raise ContentPlanningErrorHandler.handle_not_found_error("Calendar event", event_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting calendar event: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "get_calendar_event_by_id")
|
||||
|
||||
async def update_calendar_event(self, event_id: int, update_data: Dict[str, Any], db: Session) -> Dict[str, Any]:
|
||||
"""Update a calendar event."""
|
||||
try:
|
||||
logger.info(f"Updating calendar event: {event_id}")
|
||||
|
||||
db_service = ContentPlanningDBService(db)
|
||||
updated_event = await db_service.update_calendar_event(event_id, update_data)
|
||||
|
||||
if updated_event:
|
||||
return updated_event.to_dict()
|
||||
else:
|
||||
raise ContentPlanningErrorHandler.handle_not_found_error("Calendar event", event_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating calendar event: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "update_calendar_event")
|
||||
|
||||
async def delete_calendar_event(self, event_id: int, db: Session) -> bool:
|
||||
"""Delete a calendar event."""
|
||||
try:
|
||||
logger.info(f"Deleting calendar event: {event_id}")
|
||||
|
||||
db_service = ContentPlanningDBService(db)
|
||||
deleted = await db_service.delete_calendar_event(event_id)
|
||||
|
||||
if deleted:
|
||||
return True
|
||||
else:
|
||||
raise ContentPlanningErrorHandler.handle_not_found_error("Calendar event", event_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting calendar event: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "delete_calendar_event")
|
||||
|
||||
async def get_events_by_status(self, strategy_id: int, status: str, db: Session) -> List[Dict[str, Any]]:
|
||||
"""Get calendar events by status for a specific strategy."""
|
||||
try:
|
||||
logger.info(f"Fetching events for strategy {strategy_id} with status {status}")
|
||||
|
||||
db_service = ContentPlanningDBService(db)
|
||||
events = await db_service.get_events_by_status(strategy_id, status)
|
||||
|
||||
return [event.to_dict() for event in events]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting events by status: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "get_events_by_status")
|
||||
|
||||
async def get_strategy_events(self, strategy_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Get calendar events for a specific strategy."""
|
||||
try:
|
||||
logger.info(f"Fetching events for strategy: {strategy_id}")
|
||||
|
||||
db_service = ContentPlanningDBService(db)
|
||||
events = await db_service.get_strategy_calendar_events(strategy_id)
|
||||
|
||||
return {
|
||||
'strategy_id': strategy_id,
|
||||
'events_count': len(events),
|
||||
'events': [event.to_dict() for event in events]
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting strategy events: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "get_strategy_events")
|
||||
|
||||
async def schedule_event(self, event_data: Dict[str, Any], db: Session) -> Dict[str, Any]:
|
||||
"""Schedule a calendar event with conflict checking."""
|
||||
try:
|
||||
logger.info(f"Scheduling calendar event: {event_data.get('title', 'Unknown')}")
|
||||
|
||||
# Check for scheduling conflicts
|
||||
conflicts = await self._check_scheduling_conflicts(event_data, db)
|
||||
|
||||
if conflicts:
|
||||
logger.warning(f"Scheduling conflicts found: {conflicts}")
|
||||
return {
|
||||
"status": "conflict",
|
||||
"message": "Scheduling conflicts detected",
|
||||
"conflicts": conflicts,
|
||||
"event_data": event_data
|
||||
}
|
||||
|
||||
# Create the event
|
||||
created_event = await self.create_calendar_event(event_data, db)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Calendar event scheduled successfully",
|
||||
"event": created_event
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error scheduling calendar event: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "schedule_event")
|
||||
|
||||
async def _check_scheduling_conflicts(self, event_data: Dict[str, Any], db: Session) -> List[Dict[str, Any]]:
|
||||
"""Check for scheduling conflicts with existing events."""
|
||||
try:
|
||||
# This is a placeholder for conflict checking logic
|
||||
# In a real implementation, you would check for overlapping times, etc.
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking scheduling conflicts: {str(e)}")
|
||||
return []
|
||||
@@ -0,0 +1,346 @@
|
||||
# Content Strategy Implementation Status & Next Steps
|
||||
|
||||
## 📊 **Current Implementation Status**
|
||||
|
||||
### **✅ Completed (Phase 1 - Foundation)**
|
||||
|
||||
#### **1. Backend Cleanup & Reorganization** ✅
|
||||
- **✅ Deleted**: Old `strategy_service.py` (superseded by enhanced version)
|
||||
- **✅ Created**: Modular structure with 12 focused modules
|
||||
- **✅ Organized**: Related functionality into logical groups
|
||||
- **✅ Tested**: All imports and routes working correctly
|
||||
|
||||
#### **2. AI Analysis Module** ✅ **COMPLETE**
|
||||
- **✅ AI Recommendations Service**: 180 lines of comprehensive AI analysis
|
||||
- **✅ Prompt Engineering Service**: 150 lines of specialized prompt creation
|
||||
- **✅ Quality Validation Service**: 120 lines of quality assessment
|
||||
- **✅ 5 Analysis Types**: Comprehensive, Audience, Competitive, Performance, Calendar
|
||||
- **✅ Fallback System**: Robust error handling with fallback recommendations
|
||||
- **✅ Database Integration**: AI analysis result storage and retrieval
|
||||
|
||||
#### **3. Core Infrastructure** ✅
|
||||
- **✅ Core Strategy Service**: Main orchestration (188 lines)
|
||||
- **✅ Field Mappings**: Strategic input field definitions (50 lines)
|
||||
- **✅ Service Constants**: Configuration management (30 lines)
|
||||
- **✅ API Integration**: Enhanced strategy routes working
|
||||
|
||||
### **🔄 In Progress (Phase 2 - Core Modules)**
|
||||
|
||||
#### **1. Onboarding Module** 🔄 **HIGH PRIORITY**
|
||||
**Status**: Placeholder services created, needs implementation
|
||||
- **❌ Data Integration Service**: Needs real functionality
|
||||
- **❌ Field Transformation**: Needs logic implementation
|
||||
- **❌ Data Quality Assessment**: Needs quality scoring
|
||||
- **❌ Auto-Population**: Needs real data integration
|
||||
|
||||
**Next Steps**:
|
||||
```python
|
||||
# Priority 1: Implement data_integration.py
|
||||
- Extract onboarding data processing from monolithic file
|
||||
- Implement website analysis integration
|
||||
- Add research preferences processing
|
||||
- Create API keys data utilization
|
||||
|
||||
# Priority 2: Implement field_transformation.py
|
||||
- Create data to field mapping logic
|
||||
- Implement field transformation algorithms
|
||||
- Add validation and error handling
|
||||
- Test with real onboarding data
|
||||
|
||||
# Priority 3: Implement data_quality.py
|
||||
- Add completeness scoring
|
||||
- Implement confidence calculation
|
||||
- Create freshness evaluation
|
||||
- Add source attribution
|
||||
```
|
||||
|
||||
#### **2. Performance Module** 🔄 **HIGH PRIORITY**
|
||||
**Status**: Placeholder services created, needs implementation
|
||||
- **❌ Caching Service**: Needs Redis integration
|
||||
- **❌ Optimization Service**: Needs performance algorithms
|
||||
- **❌ Health Monitoring**: Needs system health checks
|
||||
- **❌ Metrics Collection**: Needs performance tracking
|
||||
|
||||
**Next Steps**:
|
||||
```python
|
||||
# Priority 1: Implement caching.py
|
||||
- Add Redis integration for AI analysis cache
|
||||
- Implement onboarding data cache (30 min TTL)
|
||||
- Add strategy cache (2 hours TTL)
|
||||
- Create intelligent cache eviction
|
||||
|
||||
# Priority 2: Implement optimization.py
|
||||
- Add response time optimization
|
||||
- Implement database query optimization
|
||||
- Create resource management
|
||||
- Add performance monitoring
|
||||
|
||||
# Priority 3: Implement health_monitoring.py
|
||||
- Add database health checks
|
||||
- Implement cache performance monitoring
|
||||
- Create AI service health assessment
|
||||
- Add response time tracking
|
||||
```
|
||||
|
||||
#### **3. Utils Module** 🔄 **HIGH PRIORITY**
|
||||
**Status**: Placeholder services created, needs implementation
|
||||
- **❌ Data Processors**: Needs utility functions
|
||||
- **❌ Validators**: Needs validation logic
|
||||
- **❌ Helper Methods**: Needs common utilities
|
||||
|
||||
**Next Steps**:
|
||||
```python
|
||||
# Priority 1: Implement data_processors.py
|
||||
- Add data transformation utilities
|
||||
- Create data cleaning functions
|
||||
- Implement data enrichment
|
||||
- Add data validation helpers
|
||||
|
||||
# Priority 2: Implement validators.py
|
||||
- Add field validation logic
|
||||
- Implement data type checking
|
||||
- Create business rule validation
|
||||
- Add error message generation
|
||||
```
|
||||
|
||||
### **📋 Pending (Phase 3 - Advanced Features)**
|
||||
|
||||
#### **1. Real AI Integration** 📋
|
||||
- **❌ OpenAI Integration**: Connect to actual AI services
|
||||
- **❌ Advanced Prompts**: Implement sophisticated prompt engineering
|
||||
- **❌ Machine Learning**: Add ML capabilities
|
||||
- **❌ Predictive Analytics**: Create predictive insights
|
||||
|
||||
#### **2. Enhanced Analytics** 📋
|
||||
- **❌ Real-time Tracking**: Implement live performance monitoring
|
||||
- **❌ Advanced Reporting**: Create comprehensive reports
|
||||
- **❌ Custom Dashboards**: Build user dashboards
|
||||
- **❌ Export Capabilities**: Add data export features
|
||||
|
||||
#### **3. User Experience** 📋
|
||||
- **❌ Progressive Disclosure**: Implement guided interface
|
||||
- **❌ Template Strategies**: Add pre-built strategy templates
|
||||
- **❌ Interactive Tutorials**: Create user onboarding
|
||||
- **❌ Smart Defaults**: Implement intelligent defaults
|
||||
|
||||
## 🎯 **Immediate Next Steps (Next 2-4 Weeks)**
|
||||
|
||||
### **Week 1-2: Complete Core Modules**
|
||||
|
||||
#### **1. Onboarding Integration** 🔥 **CRITICAL**
|
||||
```python
|
||||
# Day 1-2: Implement data_integration.py
|
||||
- Extract onboarding data processing from monolithic file
|
||||
- Implement website analysis integration
|
||||
- Add research preferences processing
|
||||
- Create API keys data utilization
|
||||
|
||||
# Day 3-4: Implement field_transformation.py
|
||||
- Create data to field mapping logic
|
||||
- Implement field transformation algorithms
|
||||
- Add validation and error handling
|
||||
- Test with real onboarding data
|
||||
|
||||
# Day 5-7: Implement data_quality.py
|
||||
- Add completeness scoring
|
||||
- Implement confidence calculation
|
||||
- Create freshness evaluation
|
||||
- Add source attribution
|
||||
```
|
||||
|
||||
#### **2. Performance Optimization** 🔥 **CRITICAL**
|
||||
```python
|
||||
# Day 1-2: Implement caching.py
|
||||
- Add Redis integration for AI analysis cache
|
||||
- Implement onboarding data cache (30 min TTL)
|
||||
- Add strategy cache (2 hours TTL)
|
||||
- Create intelligent cache eviction
|
||||
|
||||
# Day 3-4: Implement optimization.py
|
||||
- Add response time optimization
|
||||
- Implement database query optimization
|
||||
- Create resource management
|
||||
- Add performance monitoring
|
||||
|
||||
# Day 5-7: Implement health_monitoring.py
|
||||
- Add database health checks
|
||||
- Implement cache performance monitoring
|
||||
- Create AI service health assessment
|
||||
- Add response time tracking
|
||||
```
|
||||
|
||||
#### **3. Utils Implementation** 🔥 **CRITICAL**
|
||||
```python
|
||||
# Day 1-2: Implement data_processors.py
|
||||
- Add data transformation utilities
|
||||
- Create data cleaning functions
|
||||
- Implement data enrichment
|
||||
- Add data validation helpers
|
||||
|
||||
# Day 3-4: Implement validators.py
|
||||
- Add field validation logic
|
||||
- Implement data type checking
|
||||
- Create business rule validation
|
||||
- Add error message generation
|
||||
```
|
||||
|
||||
### **Week 3-4: Testing & Integration**
|
||||
|
||||
#### **1. Comprehensive Testing**
|
||||
```python
|
||||
# Unit Tests
|
||||
- Test each service independently
|
||||
- Add comprehensive test coverage
|
||||
- Implement mock services for testing
|
||||
- Create test data fixtures
|
||||
|
||||
# Integration Tests
|
||||
- Test service interactions
|
||||
- Verify API endpoints
|
||||
- Test database operations
|
||||
- Validate error handling
|
||||
|
||||
# End-to-End Tests
|
||||
- Test complete workflows
|
||||
- Verify user scenarios
|
||||
- Test performance under load
|
||||
- Validate real-world usage
|
||||
```
|
||||
|
||||
#### **2. Performance Optimization**
|
||||
```python
|
||||
# Performance Testing
|
||||
- Measure response times
|
||||
- Optimize database queries
|
||||
- Implement caching strategies
|
||||
- Monitor resource usage
|
||||
|
||||
# Load Testing
|
||||
- Test with multiple users
|
||||
- Verify scalability
|
||||
- Monitor memory usage
|
||||
- Optimize for production
|
||||
```
|
||||
|
||||
## 🚀 **Medium-term Goals (Next 2-3 Months)**
|
||||
|
||||
### **Phase 2: Enhanced Features**
|
||||
|
||||
#### **1. Real AI Integration**
|
||||
- [ ] Integrate with OpenAI API
|
||||
- [ ] Add Claude API integration
|
||||
- [ ] Implement advanced prompt engineering
|
||||
- [ ] Create machine learning capabilities
|
||||
|
||||
#### **2. Advanced Analytics**
|
||||
- [ ] Real-time performance tracking
|
||||
- [ ] Advanced reporting system
|
||||
- [ ] Custom dashboard creation
|
||||
- [ ] Data export capabilities
|
||||
|
||||
#### **3. User Experience Improvements**
|
||||
- [ ] Progressive disclosure implementation
|
||||
- [ ] Guided wizard interface
|
||||
- [ ] Template-based strategies
|
||||
- [ ] Interactive tutorials
|
||||
|
||||
### **Phase 3: Enterprise Features**
|
||||
|
||||
#### **1. Advanced AI Capabilities**
|
||||
- [ ] Multi-model AI integration
|
||||
- [ ] Custom model training
|
||||
- [ ] Advanced analytics
|
||||
- [ ] Predictive insights
|
||||
|
||||
#### **2. Collaboration Features**
|
||||
- [ ] Team collaboration tools
|
||||
- [ ] Strategy sharing
|
||||
- [ ] Version control
|
||||
- [ ] Approval workflows
|
||||
|
||||
#### **3. Enterprise Integration**
|
||||
- [ ] CRM integration
|
||||
- [ ] Marketing automation
|
||||
- [ ] Analytics platforms
|
||||
- [ ] Custom API endpoints
|
||||
|
||||
## 📈 **Success Metrics & KPIs**
|
||||
|
||||
### **Technical Metrics**
|
||||
- **Response Time**: < 2 seconds for strategy creation
|
||||
- **Cache Hit Rate**: > 80% for frequently accessed data
|
||||
- **Error Rate**: < 1% for all operations
|
||||
- **Uptime**: > 99.9% availability
|
||||
|
||||
### **Quality Metrics**
|
||||
- **AI Response Quality**: > 85% confidence scores
|
||||
- **Data Completeness**: > 90% field completion
|
||||
- **User Satisfaction**: > 4.5/5 rating
|
||||
- **Strategy Effectiveness**: Measurable ROI improvements
|
||||
|
||||
### **Business Metrics**
|
||||
- **User Adoption**: Growing user base
|
||||
- **Feature Usage**: High engagement with AI features
|
||||
- **Customer Retention**: > 90% monthly retention
|
||||
- **Revenue Impact**: Measurable business value
|
||||
|
||||
## 🔧 **Development Guidelines**
|
||||
|
||||
### **1. Code Quality Standards**
|
||||
- **Type Hints**: Use comprehensive type annotations
|
||||
- **Documentation**: Document all public methods
|
||||
- **Error Handling**: Implement robust error handling
|
||||
- **Logging**: Add comprehensive logging
|
||||
|
||||
### **2. Testing Strategy**
|
||||
- **Unit Tests**: Test each service independently
|
||||
- **Integration Tests**: Test service interactions
|
||||
- **End-to-End Tests**: Test complete workflows
|
||||
- **Performance Tests**: Monitor response times
|
||||
|
||||
### **3. Performance Considerations**
|
||||
- **Caching**: Implement intelligent caching strategies
|
||||
- **Database Optimization**: Use efficient queries
|
||||
- **Async Operations**: Use async/await for I/O operations
|
||||
- **Resource Management**: Properly manage memory and connections
|
||||
|
||||
## 🎯 **Risk Assessment & Mitigation**
|
||||
|
||||
### **High Risk Items**
|
||||
1. **Onboarding Integration Complexity**: Mitigation - Start with simple implementations
|
||||
2. **Performance Optimization**: Mitigation - Implement caching first
|
||||
3. **AI Service Integration**: Mitigation - Use fallback systems
|
||||
4. **Database Performance**: Mitigation - Optimize queries and add indexing
|
||||
|
||||
### **Medium Risk Items**
|
||||
1. **User Experience**: Mitigation - Implement progressive disclosure
|
||||
2. **Data Quality**: Mitigation - Add comprehensive validation
|
||||
3. **Scalability**: Mitigation - Design for horizontal scaling
|
||||
4. **Maintenance**: Mitigation - Comprehensive documentation and testing
|
||||
|
||||
## 📋 **Resource Requirements**
|
||||
|
||||
### **Development Team**
|
||||
- **Backend Developer**: 1-2 developers for core modules
|
||||
- **AI Specialist**: 1 developer for AI integration
|
||||
- **DevOps Engineer**: 1 engineer for deployment and monitoring
|
||||
- **QA Engineer**: 1 engineer for testing and quality assurance
|
||||
|
||||
### **Infrastructure**
|
||||
- **Database**: PostgreSQL with proper indexing
|
||||
- **Cache**: Redis for performance optimization
|
||||
- **AI Services**: OpenAI/Claude API integration
|
||||
- **Monitoring**: Application performance monitoring
|
||||
|
||||
### **Timeline**
|
||||
- **Phase 1 (Core Modules)**: 2-4 weeks
|
||||
- **Phase 2 (Enhanced Features)**: 2-3 months
|
||||
- **Phase 3 (Enterprise Features)**: 6-12 months
|
||||
|
||||
## 🎉 **Conclusion**
|
||||
|
||||
The Content Strategy Services have a solid foundation with the AI Analysis module complete and the core infrastructure in place. The immediate priority is to complete the Onboarding, Performance, and Utils modules to create a fully functional system. With proper implementation of the next steps, the system will provide enterprise-level content strategy capabilities to solopreneurs and small businesses.
|
||||
|
||||
**Current Status**: 40% Complete (Foundation + AI Analysis)
|
||||
**Next Milestone**: 70% Complete (Core Modules)
|
||||
**Target Completion**: 100% Complete (All Features)
|
||||
363
backend/api/content_planning/services/content_strategy/README.md
Normal file
363
backend/api/content_planning/services/content_strategy/README.md
Normal file
@@ -0,0 +1,363 @@
|
||||
# Content Strategy Services
|
||||
|
||||
## 🎯 **Overview**
|
||||
|
||||
The Content Strategy Services module provides comprehensive content strategy management with 30+ strategic inputs, AI-powered recommendations, and enterprise-level analysis capabilities. This modular architecture enables solopreneurs, small business owners, and startups to access expert-level content strategy without requiring expensive digital marketing teams.
|
||||
|
||||
## 🏗️ **Architecture**
|
||||
|
||||
```
|
||||
content_strategy/
|
||||
├── core/ # Main orchestration & configuration
|
||||
│ ├── strategy_service.py # Main service orchestration
|
||||
│ ├── field_mappings.py # Strategic input field definitions
|
||||
│ └── constants.py # Service configuration
|
||||
├── ai_analysis/ # AI recommendation generation
|
||||
│ ├── ai_recommendations.py # Comprehensive AI analysis
|
||||
│ ├── prompt_engineering.py # Specialized prompt creation
|
||||
│ └── quality_validation.py # Quality assessment & scoring
|
||||
├── onboarding/ # Onboarding data integration
|
||||
│ ├── data_integration.py # Onboarding data processing
|
||||
│ ├── field_transformation.py # Data to field mapping
|
||||
│ └── data_quality.py # Quality assessment
|
||||
├── performance/ # Performance optimization
|
||||
│ ├── caching.py # Cache management
|
||||
│ ├── optimization.py # Performance optimization
|
||||
│ └── health_monitoring.py # System health checks
|
||||
└── utils/ # Data processing utilities
|
||||
├── data_processors.py # Data processing utilities
|
||||
└── validators.py # Data validation
|
||||
```
|
||||
|
||||
## 🚀 **Key Features**
|
||||
|
||||
### **1. Comprehensive Strategic Inputs (30+ Fields)**
|
||||
|
||||
#### **Business Context**
|
||||
- Business Objectives & Target Metrics
|
||||
- Content Budget & Team Size
|
||||
- Implementation Timeline & Market Share
|
||||
- Competitive Position & Performance Metrics
|
||||
|
||||
#### **Audience Intelligence**
|
||||
- Content Preferences & Consumption Patterns
|
||||
- Audience Pain Points & Buying Journey
|
||||
- Seasonal Trends & Engagement Metrics
|
||||
|
||||
#### **Competitive Intelligence**
|
||||
- Top Competitors & Competitor Strategies
|
||||
- Market Gaps & Industry Trends
|
||||
- Emerging Trends Analysis
|
||||
|
||||
#### **Content Strategy**
|
||||
- Preferred Formats & Content Mix
|
||||
- Content Frequency & Optimal Timing
|
||||
- Quality Metrics & Editorial Guidelines
|
||||
- Brand Voice Definition
|
||||
|
||||
#### **Performance Analytics**
|
||||
- Traffic Sources & Conversion Rates
|
||||
- Content ROI Targets & A/B Testing
|
||||
|
||||
### **2. AI-Powered Recommendations**
|
||||
|
||||
#### **Comprehensive Analysis Types**
|
||||
- **Comprehensive Strategy**: Full strategic positioning and market analysis
|
||||
- **Audience Intelligence**: Detailed audience persona development
|
||||
- **Competitive Intelligence**: Competitor analysis and market positioning
|
||||
- **Performance Optimization**: Traffic and conversion optimization
|
||||
- **Content Calendar Optimization**: Scheduling and timing optimization
|
||||
|
||||
#### **Quality Assessment**
|
||||
- AI Response Quality Validation
|
||||
- Strategic Score Calculation
|
||||
- Market Positioning Analysis
|
||||
- Competitive Advantage Extraction
|
||||
- Risk Assessment & Opportunity Analysis
|
||||
|
||||
### **3. Onboarding Data Integration**
|
||||
|
||||
#### **Smart Auto-Population**
|
||||
- Website Analysis Integration
|
||||
- Research Preferences Processing
|
||||
- API Keys Data Utilization
|
||||
- Field Transformation & Mapping
|
||||
|
||||
#### **Data Quality Assessment**
|
||||
- Completeness Scoring
|
||||
- Confidence Level Calculation
|
||||
- Data Freshness Evaluation
|
||||
- Source Attribution
|
||||
|
||||
### **4. Performance Optimization**
|
||||
|
||||
#### **Caching System**
|
||||
- AI Analysis Cache (1 hour TTL)
|
||||
- Onboarding Data Cache (30 minutes TTL)
|
||||
- Strategy Cache (2 hours TTL)
|
||||
- Intelligent Cache Eviction
|
||||
|
||||
#### **Health Monitoring**
|
||||
- Database Health Checks
|
||||
- Cache Performance Monitoring
|
||||
- AI Service Health Assessment
|
||||
- Response Time Optimization
|
||||
|
||||
## 📊 **Current Implementation Status**
|
||||
|
||||
### **✅ Completed Features**
|
||||
|
||||
#### **1. Core Infrastructure**
|
||||
- [x] Modular service architecture
|
||||
- [x] Core strategy service orchestration
|
||||
- [x] Strategic input field definitions
|
||||
- [x] Service configuration management
|
||||
|
||||
#### **2. AI Analysis Module**
|
||||
- [x] AI recommendations service (180 lines)
|
||||
- [x] Prompt engineering service (150 lines)
|
||||
- [x] Quality validation service (120 lines)
|
||||
- [x] 5 specialized analysis types
|
||||
- [x] Fallback recommendation system
|
||||
- [x] Quality assessment capabilities
|
||||
|
||||
#### **3. Database Integration**
|
||||
- [x] Enhanced strategy models
|
||||
- [x] AI analysis result storage
|
||||
- [x] Onboarding data integration
|
||||
- [x] Performance metrics tracking
|
||||
|
||||
#### **4. API Integration**
|
||||
- [x] Enhanced strategy routes
|
||||
- [x] Onboarding data endpoints
|
||||
- [x] AI analytics endpoints
|
||||
- [x] Performance monitoring endpoints
|
||||
|
||||
### **🔄 In Progress**
|
||||
|
||||
#### **1. Onboarding Module**
|
||||
- [ ] Data integration service implementation
|
||||
- [ ] Field transformation logic
|
||||
- [ ] Data quality assessment
|
||||
- [ ] Auto-population functionality
|
||||
|
||||
#### **2. Performance Module**
|
||||
- [ ] Caching service implementation
|
||||
- [ ] Optimization algorithms
|
||||
- [ ] Health monitoring system
|
||||
- [ ] Performance metrics collection
|
||||
|
||||
#### **3. Utils Module**
|
||||
- [ ] Data processing utilities
|
||||
- [ ] Validation functions
|
||||
- [ ] Helper methods
|
||||
|
||||
### **📋 Pending Implementation**
|
||||
|
||||
#### **1. Advanced AI Features**
|
||||
- [ ] Real AI service integration
|
||||
- [ ] Advanced prompt engineering
|
||||
- [ ] Machine learning models
|
||||
- [ ] Predictive analytics
|
||||
|
||||
#### **2. Enhanced Analytics**
|
||||
- [ ] Real-time performance tracking
|
||||
- [ ] Advanced reporting
|
||||
- [ ] Custom dashboards
|
||||
- [ ] Export capabilities
|
||||
|
||||
#### **3. User Experience**
|
||||
- [ ] Progressive disclosure
|
||||
- [ ] Guided wizard interface
|
||||
- [ ] Template-based strategies
|
||||
- [ ] Interactive tutorials
|
||||
|
||||
## 🎯 **Next Steps Priority**
|
||||
|
||||
### **Phase 1: Complete Core Modules (Immediate)**
|
||||
|
||||
#### **1. Onboarding Integration** 🔥 **HIGH PRIORITY**
|
||||
```python
|
||||
# Priority: Complete onboarding data integration
|
||||
- Implement data_integration.py with real functionality
|
||||
- Add field_transformation.py logic
|
||||
- Implement data_quality.py assessment
|
||||
- Test auto-population with real data
|
||||
```
|
||||
|
||||
#### **2. Performance Optimization** 🔥 **HIGH PRIORITY**
|
||||
```python
|
||||
# Priority: Implement caching and optimization
|
||||
- Complete caching.py with Redis integration
|
||||
- Add optimization.py algorithms
|
||||
- Implement health_monitoring.py
|
||||
- Add performance metrics collection
|
||||
```
|
||||
|
||||
#### **3. Utils Implementation** 🔥 **HIGH PRIORITY**
|
||||
```python
|
||||
# Priority: Add utility functions
|
||||
- Implement data_processors.py
|
||||
- Add validators.py functions
|
||||
- Create helper methods
|
||||
- Add comprehensive error handling
|
||||
```
|
||||
|
||||
### **Phase 2: Enhanced Features (Short-term)**
|
||||
|
||||
#### **1. Real AI Integration**
|
||||
- [ ] Integrate with actual AI services (OpenAI, Claude, etc.)
|
||||
- [ ] Implement advanced prompt engineering
|
||||
- [ ] Add machine learning capabilities
|
||||
- [ ] Create predictive analytics
|
||||
|
||||
#### **2. Advanced Analytics**
|
||||
- [ ] Real-time performance tracking
|
||||
- [ ] Advanced reporting system
|
||||
- [ ] Custom dashboard creation
|
||||
- [ ] Data export capabilities
|
||||
|
||||
#### **3. User Experience Improvements**
|
||||
- [ ] Progressive disclosure implementation
|
||||
- [ ] Guided wizard interface
|
||||
- [ ] Template-based strategies
|
||||
- [ ] Interactive tutorials
|
||||
|
||||
### **Phase 3: Enterprise Features (Long-term)**
|
||||
|
||||
#### **1. Advanced AI Capabilities**
|
||||
- [ ] Multi-model AI integration
|
||||
- [ ] Custom model training
|
||||
- [ ] Advanced analytics
|
||||
- [ ] Predictive insights
|
||||
|
||||
#### **2. Collaboration Features**
|
||||
- [ ] Team collaboration tools
|
||||
- [ ] Strategy sharing
|
||||
- [ ] Version control
|
||||
- [ ] Approval workflows
|
||||
|
||||
#### **3. Enterprise Integration**
|
||||
- [ ] CRM integration
|
||||
- [ ] Marketing automation
|
||||
- [ ] Analytics platforms
|
||||
- [ ] Custom API endpoints
|
||||
|
||||
## 🔧 **Development Guidelines**
|
||||
|
||||
### **1. Module Boundaries**
|
||||
- **Respect service responsibilities**: Each module has clear boundaries
|
||||
- **Use dependency injection**: Services should be loosely coupled
|
||||
- **Follow single responsibility**: Each service has one primary purpose
|
||||
- **Maintain clear interfaces**: Well-defined method signatures
|
||||
|
||||
### **2. Testing Strategy**
|
||||
- **Unit tests**: Test each service independently
|
||||
- **Integration tests**: Test service interactions
|
||||
- **End-to-end tests**: Test complete workflows
|
||||
- **Performance tests**: Monitor response times
|
||||
|
||||
### **3. Code Quality**
|
||||
- **Type hints**: Use comprehensive type annotations
|
||||
- **Documentation**: Document all public methods
|
||||
- **Error handling**: Implement robust error handling
|
||||
- **Logging**: Add comprehensive logging
|
||||
|
||||
### **4. Performance Considerations**
|
||||
- **Caching**: Implement intelligent caching strategies
|
||||
- **Database optimization**: Use efficient queries
|
||||
- **Async operations**: Use async/await for I/O operations
|
||||
- **Resource management**: Properly manage memory and connections
|
||||
|
||||
## 📈 **Success Metrics**
|
||||
|
||||
### **1. Performance Metrics**
|
||||
- **Response Time**: < 2 seconds for strategy creation
|
||||
- **Cache Hit Rate**: > 80% for frequently accessed data
|
||||
- **Error Rate**: < 1% for all operations
|
||||
- **Uptime**: > 99.9% availability
|
||||
|
||||
### **2. Quality Metrics**
|
||||
- **AI Response Quality**: > 85% confidence scores
|
||||
- **Data Completeness**: > 90% field completion
|
||||
- **User Satisfaction**: > 4.5/5 rating
|
||||
- **Strategy Effectiveness**: Measurable ROI improvements
|
||||
|
||||
### **3. Business Metrics**
|
||||
- **User Adoption**: Growing user base
|
||||
- **Feature Usage**: High engagement with AI features
|
||||
- **Customer Retention**: > 90% monthly retention
|
||||
- **Revenue Impact**: Measurable business value
|
||||
|
||||
## 🚀 **Getting Started**
|
||||
|
||||
### **1. Setup Development Environment**
|
||||
```bash
|
||||
# Install dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Set up database
|
||||
python manage.py migrate
|
||||
|
||||
# Run tests
|
||||
python -m pytest tests/
|
||||
```
|
||||
|
||||
### **2. Run the Service**
|
||||
```bash
|
||||
# Start the development server
|
||||
uvicorn main:app --reload
|
||||
|
||||
# Access the API
|
||||
curl http://localhost:8000/api/content-planning/strategies/
|
||||
```
|
||||
|
||||
### **3. Test AI Features**
|
||||
```python
|
||||
# Create a strategy with AI recommendations
|
||||
from api.content_planning.services.content_strategy import EnhancedStrategyService
|
||||
|
||||
service = EnhancedStrategyService()
|
||||
strategy = await service.create_enhanced_strategy(strategy_data, db)
|
||||
```
|
||||
|
||||
## 📚 **Documentation**
|
||||
|
||||
- **API Documentation**: `/docs` endpoint for interactive API docs
|
||||
- **Code Documentation**: Comprehensive docstrings in all modules
|
||||
- **Architecture Guide**: Detailed system architecture documentation
|
||||
- **User Guide**: Step-by-step user instructions
|
||||
|
||||
## 🤝 **Contributing**
|
||||
|
||||
### **1. Development Workflow**
|
||||
- Create feature branches from `main`
|
||||
- Write comprehensive tests
|
||||
- Update documentation
|
||||
- Submit pull requests
|
||||
|
||||
### **2. Code Review Process**
|
||||
- All changes require code review
|
||||
- Automated testing must pass
|
||||
- Documentation must be updated
|
||||
- Performance impact must be assessed
|
||||
|
||||
### **3. Release Process**
|
||||
- Semantic versioning
|
||||
- Changelog maintenance
|
||||
- Automated deployment
|
||||
- Rollback procedures
|
||||
|
||||
## 📞 **Support**
|
||||
|
||||
For questions, issues, or contributions:
|
||||
- **Issues**: Create GitHub issues for bugs or feature requests
|
||||
- **Discussions**: Use GitHub discussions for questions
|
||||
- **Documentation**: Check the comprehensive documentation
|
||||
- **Community**: Join our developer community
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: August 2024
|
||||
**Version**: 1.0.0
|
||||
**Status**: Active Development
|
||||
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
Content Strategy Module
|
||||
Modular implementation of enhanced content strategy services.
|
||||
"""
|
||||
|
||||
from .core.strategy_service import EnhancedStrategyService as ModularEnhancedStrategyService
|
||||
|
||||
__all__ = ['ModularEnhancedStrategyService']
|
||||
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
AI Analysis Module
|
||||
AI recommendation generation and analysis services.
|
||||
"""
|
||||
|
||||
from .ai_recommendations import AIRecommendationsService
|
||||
from .prompt_engineering import PromptEngineeringService
|
||||
from .quality_validation import QualityValidationService
|
||||
|
||||
__all__ = ['AIRecommendationsService', 'PromptEngineeringService', 'QualityValidationService']
|
||||
@@ -0,0 +1,182 @@
|
||||
"""
|
||||
AI Recommendations Service
|
||||
AI recommendation generation and analysis.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
# Import database models
|
||||
from models.enhanced_strategy_models import EnhancedContentStrategy, EnhancedAIAnalysisResult
|
||||
|
||||
# Import modular components
|
||||
from .prompt_engineering import PromptEngineeringService
|
||||
from .quality_validation import QualityValidationService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AIRecommendationsService:
|
||||
"""Service for AI recommendation generation."""
|
||||
|
||||
def __init__(self):
|
||||
self.prompt_engineering_service = PromptEngineeringService()
|
||||
self.quality_validation_service = QualityValidationService()
|
||||
|
||||
# Analysis types for comprehensive recommendations
|
||||
self.analysis_types = [
|
||||
'comprehensive_strategy',
|
||||
'audience_intelligence',
|
||||
'competitive_intelligence',
|
||||
'performance_optimization',
|
||||
'content_calendar_optimization'
|
||||
]
|
||||
|
||||
async def generate_comprehensive_recommendations(self, strategy: EnhancedContentStrategy, db: Session) -> None:
|
||||
"""Generate comprehensive AI recommendations using 5 specialized prompts."""
|
||||
try:
|
||||
logger.info(f"Generating comprehensive AI recommendations for strategy: {strategy.id}")
|
||||
|
||||
start_time = datetime.utcnow()
|
||||
|
||||
# Generate recommendations for each analysis type
|
||||
ai_recommendations = {}
|
||||
|
||||
for analysis_type in self.analysis_types:
|
||||
try:
|
||||
recommendations = await self._generate_specialized_recommendations(
|
||||
strategy, analysis_type, db
|
||||
)
|
||||
ai_recommendations[analysis_type] = recommendations
|
||||
|
||||
# Store individual analysis result
|
||||
analysis_result = EnhancedAIAnalysisResult(
|
||||
user_id=strategy.user_id,
|
||||
strategy_id=strategy.id,
|
||||
analysis_type=analysis_type,
|
||||
comprehensive_insights=recommendations.get('comprehensive_insights'),
|
||||
audience_intelligence=recommendations.get('audience_intelligence'),
|
||||
competitive_intelligence=recommendations.get('competitive_intelligence'),
|
||||
performance_optimization=recommendations.get('performance_optimization'),
|
||||
content_calendar_optimization=recommendations.get('content_calendar_optimization'),
|
||||
onboarding_data_used=strategy.onboarding_data_used,
|
||||
processing_time=(datetime.utcnow() - start_time).total_seconds(),
|
||||
ai_service_status="operational"
|
||||
)
|
||||
|
||||
db.add(analysis_result)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating {analysis_type} recommendations: {str(e)}")
|
||||
# Continue with other analysis types
|
||||
|
||||
db.commit()
|
||||
|
||||
# Update strategy with comprehensive AI analysis
|
||||
strategy.comprehensive_ai_analysis = ai_recommendations
|
||||
strategy.strategic_scores = self.quality_validation_service.calculate_strategic_scores(ai_recommendations)
|
||||
strategy.market_positioning = self.quality_validation_service.extract_market_positioning(ai_recommendations)
|
||||
strategy.competitive_advantages = self.quality_validation_service.extract_competitive_advantages(ai_recommendations)
|
||||
strategy.strategic_risks = self.quality_validation_service.extract_strategic_risks(ai_recommendations)
|
||||
strategy.opportunity_analysis = self.quality_validation_service.extract_opportunity_analysis(ai_recommendations)
|
||||
|
||||
db.commit()
|
||||
|
||||
processing_time = (datetime.utcnow() - start_time).total_seconds()
|
||||
logger.info(f"Comprehensive AI recommendations generated in {processing_time:.2f} seconds")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating comprehensive AI recommendations: {str(e)}")
|
||||
# Don't raise error, just log it as this is enhancement, not core functionality
|
||||
|
||||
async def _generate_specialized_recommendations(self, strategy: EnhancedContentStrategy, analysis_type: str, db: Session) -> Dict[str, Any]:
|
||||
"""Generate specialized recommendations using specific AI prompts."""
|
||||
try:
|
||||
# Prepare strategy data for AI analysis
|
||||
strategy_data = strategy.to_dict()
|
||||
|
||||
# Create prompt based on analysis type
|
||||
prompt = self.prompt_engineering_service.create_specialized_prompt(strategy, analysis_type)
|
||||
|
||||
# Generate AI response
|
||||
ai_response = await self._call_ai_service(prompt, analysis_type)
|
||||
|
||||
# Parse and structure the response
|
||||
structured_response = self._parse_ai_response(ai_response, analysis_type)
|
||||
|
||||
return structured_response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating {analysis_type} recommendations: {str(e)}")
|
||||
return self._get_fallback_recommendations(analysis_type)
|
||||
|
||||
async def _call_ai_service(self, prompt: str, analysis_type: str) -> Dict[str, Any]:
|
||||
"""Call AI service to generate recommendations."""
|
||||
# Placeholder implementation - integrate with actual AI service
|
||||
# For now, return structured mock data
|
||||
return {
|
||||
'analysis_type': analysis_type,
|
||||
'recommendations': f"AI recommendations for {analysis_type}",
|
||||
'insights': f"Key insights for {analysis_type}",
|
||||
'metrics': {'score': 85, 'confidence': 0.9}
|
||||
}
|
||||
|
||||
def _parse_ai_response(self, ai_response: Dict[str, Any], analysis_type: str) -> Dict[str, Any]:
|
||||
"""Parse and structure AI response."""
|
||||
return {
|
||||
'analysis_type': analysis_type,
|
||||
'recommendations': ai_response.get('recommendations', []),
|
||||
'insights': ai_response.get('insights', []),
|
||||
'metrics': ai_response.get('metrics', {}),
|
||||
'confidence_score': ai_response.get('metrics', {}).get('confidence', 0.8)
|
||||
}
|
||||
|
||||
def _get_fallback_recommendations(self, analysis_type: str) -> Dict[str, Any]:
|
||||
"""Get fallback recommendations when AI service fails."""
|
||||
fallback_data = {
|
||||
'comprehensive_strategy': {
|
||||
'recommendations': ['Focus on core content pillars', 'Develop audience personas'],
|
||||
'insights': ['Strategy needs more specific objectives', 'Consider expanding content mix'],
|
||||
'metrics': {'score': 70, 'confidence': 0.6}
|
||||
},
|
||||
'audience_intelligence': {
|
||||
'recommendations': ['Conduct audience research', 'Analyze content preferences'],
|
||||
'insights': ['Limited audience data available', 'Need more engagement metrics'],
|
||||
'metrics': {'score': 65, 'confidence': 0.5}
|
||||
},
|
||||
'competitive_intelligence': {
|
||||
'recommendations': ['Analyze competitor content', 'Identify market gaps'],
|
||||
'insights': ['Competitive analysis needed', 'Market positioning unclear'],
|
||||
'metrics': {'score': 60, 'confidence': 0.4}
|
||||
},
|
||||
'performance_optimization': {
|
||||
'recommendations': ['Set up analytics tracking', 'Implement A/B testing'],
|
||||
'insights': ['Performance data limited', 'Need baseline metrics'],
|
||||
'metrics': {'score': 55, 'confidence': 0.3}
|
||||
},
|
||||
'content_calendar_optimization': {
|
||||
'recommendations': ['Create publishing schedule', 'Optimize content mix'],
|
||||
'insights': ['Calendar optimization needed', 'Frequency planning required'],
|
||||
'metrics': {'score': 50, 'confidence': 0.2}
|
||||
}
|
||||
}
|
||||
|
||||
return fallback_data.get(analysis_type, {
|
||||
'recommendations': ['General strategy improvement needed'],
|
||||
'insights': ['Limited data available for analysis'],
|
||||
'metrics': {'score': 50, 'confidence': 0.3}
|
||||
})
|
||||
|
||||
async def get_latest_ai_analysis(self, strategy_id: int, db: Session) -> Optional[Dict[str, Any]]:
|
||||
"""Get latest AI analysis for a strategy."""
|
||||
try:
|
||||
analysis = db.query(EnhancedAIAnalysisResult).filter(
|
||||
EnhancedAIAnalysisResult.strategy_id == strategy_id
|
||||
).order_by(EnhancedAIAnalysisResult.created_at.desc()).first()
|
||||
|
||||
return analysis.to_dict() if analysis else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting latest AI analysis: {str(e)}")
|
||||
return None
|
||||
@@ -0,0 +1,169 @@
|
||||
"""
|
||||
Prompt Engineering Service
|
||||
AI prompt creation and management.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
|
||||
# Import database models
|
||||
from models.enhanced_strategy_models import EnhancedContentStrategy
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class PromptEngineeringService:
|
||||
"""Service for prompt engineering."""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def create_specialized_prompt(self, strategy: EnhancedContentStrategy, analysis_type: str) -> str:
|
||||
"""Create specialized AI prompts for each analysis type."""
|
||||
|
||||
base_context = f"""
|
||||
Business Context:
|
||||
- Industry: {strategy.industry}
|
||||
- Business Objectives: {strategy.business_objectives}
|
||||
- Target Metrics: {strategy.target_metrics}
|
||||
- Content Budget: {strategy.content_budget}
|
||||
- Team Size: {strategy.team_size}
|
||||
- Implementation Timeline: {strategy.implementation_timeline}
|
||||
- Market Share: {strategy.market_share}
|
||||
- Competitive Position: {strategy.competitive_position}
|
||||
- Performance Metrics: {strategy.performance_metrics}
|
||||
|
||||
Audience Intelligence:
|
||||
- Content Preferences: {strategy.content_preferences}
|
||||
- Consumption Patterns: {strategy.consumption_patterns}
|
||||
- Audience Pain Points: {strategy.audience_pain_points}
|
||||
- Buying Journey: {strategy.buying_journey}
|
||||
- Seasonal Trends: {strategy.seasonal_trends}
|
||||
- Engagement Metrics: {strategy.engagement_metrics}
|
||||
|
||||
Competitive Intelligence:
|
||||
- Top Competitors: {strategy.top_competitors}
|
||||
- Competitor Content Strategies: {strategy.competitor_content_strategies}
|
||||
- Market Gaps: {strategy.market_gaps}
|
||||
- Industry Trends: {strategy.industry_trends}
|
||||
- Emerging Trends: {strategy.emerging_trends}
|
||||
|
||||
Content Strategy:
|
||||
- Preferred Formats: {strategy.preferred_formats}
|
||||
- Content Mix: {strategy.content_mix}
|
||||
- Content Frequency: {strategy.content_frequency}
|
||||
- Optimal Timing: {strategy.optimal_timing}
|
||||
- Quality Metrics: {strategy.quality_metrics}
|
||||
- Editorial Guidelines: {strategy.editorial_guidelines}
|
||||
- Brand Voice: {strategy.brand_voice}
|
||||
|
||||
Performance & Analytics:
|
||||
- Traffic Sources: {strategy.traffic_sources}
|
||||
- Conversion Rates: {strategy.conversion_rates}
|
||||
- Content ROI Targets: {strategy.content_roi_targets}
|
||||
- A/B Testing Capabilities: {strategy.ab_testing_capabilities}
|
||||
"""
|
||||
|
||||
specialized_prompts = {
|
||||
'comprehensive_strategy': f"""
|
||||
{base_context}
|
||||
|
||||
TASK: Generate a comprehensive content strategy analysis that provides:
|
||||
1. Strategic positioning and market analysis
|
||||
2. Audience targeting and persona development
|
||||
3. Content pillar recommendations with rationale
|
||||
4. Competitive advantage identification
|
||||
5. Performance optimization strategies
|
||||
6. Risk assessment and mitigation plans
|
||||
7. Implementation roadmap with milestones
|
||||
8. Success metrics and KPIs
|
||||
|
||||
REQUIREMENTS:
|
||||
- Provide actionable, specific recommendations
|
||||
- Include data-driven insights
|
||||
- Consider industry best practices
|
||||
- Address both short-term and long-term goals
|
||||
- Provide confidence levels for each recommendation
|
||||
""",
|
||||
|
||||
'audience_intelligence': f"""
|
||||
{base_context}
|
||||
|
||||
TASK: Generate detailed audience intelligence analysis including:
|
||||
1. Comprehensive audience persona development
|
||||
2. Content preference analysis and recommendations
|
||||
3. Consumption pattern insights and optimization
|
||||
4. Pain point identification and content solutions
|
||||
5. Buying journey mapping and content alignment
|
||||
6. Seasonal trend analysis and content planning
|
||||
7. Engagement pattern analysis and optimization
|
||||
8. Audience segmentation strategies
|
||||
|
||||
REQUIREMENTS:
|
||||
- Use data-driven insights from provided metrics
|
||||
- Provide specific content recommendations for each audience segment
|
||||
- Include engagement optimization strategies
|
||||
- Consider cultural and behavioral factors
|
||||
""",
|
||||
|
||||
'competitive_intelligence': f"""
|
||||
{base_context}
|
||||
|
||||
TASK: Generate comprehensive competitive intelligence analysis including:
|
||||
1. Competitor content strategy analysis
|
||||
2. Market gap identification and opportunities
|
||||
3. Competitive advantage development strategies
|
||||
4. Industry trend analysis and implications
|
||||
5. Emerging trend identification and early adoption strategies
|
||||
6. Competitive positioning recommendations
|
||||
7. Market opportunity assessment
|
||||
8. Competitive response strategies
|
||||
|
||||
REQUIREMENTS:
|
||||
- Analyze provided competitor data thoroughly
|
||||
- Identify unique market opportunities
|
||||
- Provide actionable competitive strategies
|
||||
- Consider both direct and indirect competitors
|
||||
""",
|
||||
|
||||
'performance_optimization': f"""
|
||||
{base_context}
|
||||
|
||||
TASK: Generate performance optimization analysis including:
|
||||
1. Current performance analysis and benchmarking
|
||||
2. Traffic source optimization strategies
|
||||
3. Conversion rate improvement recommendations
|
||||
4. Content ROI optimization strategies
|
||||
5. A/B testing framework and recommendations
|
||||
6. Performance monitoring and analytics setup
|
||||
7. Optimization roadmap and priorities
|
||||
8. Success metrics and tracking implementation
|
||||
|
||||
REQUIREMENTS:
|
||||
- Provide specific, measurable optimization strategies
|
||||
- Include data-driven recommendations
|
||||
- Consider both technical and content optimizations
|
||||
- Provide implementation timelines and priorities
|
||||
""",
|
||||
|
||||
'content_calendar_optimization': f"""
|
||||
{base_context}
|
||||
|
||||
TASK: Generate content calendar optimization analysis including:
|
||||
1. Optimal content frequency and timing analysis
|
||||
2. Content mix optimization and balance
|
||||
3. Seasonal content planning and scheduling
|
||||
4. Content pillar integration and scheduling
|
||||
5. Platform-specific content adaptation
|
||||
6. Content repurposing and amplification strategies
|
||||
7. Editorial calendar optimization
|
||||
8. Content performance tracking and adjustment
|
||||
|
||||
REQUIREMENTS:
|
||||
- Provide specific scheduling recommendations
|
||||
- Include content mix optimization strategies
|
||||
- Consider platform-specific requirements
|
||||
- Provide seasonal and trend-based planning
|
||||
"""
|
||||
}
|
||||
|
||||
return specialized_prompts.get(analysis_type, base_context)
|
||||
@@ -0,0 +1,166 @@
|
||||
"""
|
||||
Quality Validation Service
|
||||
AI response quality assessment and strategic analysis.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class QualityValidationService:
|
||||
"""Service for quality validation and strategic analysis."""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def calculate_strategic_scores(self, ai_recommendations: Dict[str, Any]) -> Dict[str, float]:
|
||||
"""Calculate strategic performance scores from AI recommendations."""
|
||||
scores = {
|
||||
'overall_score': 0.0,
|
||||
'content_quality_score': 0.0,
|
||||
'engagement_score': 0.0,
|
||||
'conversion_score': 0.0,
|
||||
'innovation_score': 0.0
|
||||
}
|
||||
|
||||
# Calculate scores based on AI recommendations
|
||||
total_confidence = 0
|
||||
total_score = 0
|
||||
|
||||
for analysis_type, recommendations in ai_recommendations.items():
|
||||
if isinstance(recommendations, dict) and 'metrics' in recommendations:
|
||||
metrics = recommendations['metrics']
|
||||
score = metrics.get('score', 50)
|
||||
confidence = metrics.get('confidence', 0.5)
|
||||
|
||||
total_score += score * confidence
|
||||
total_confidence += confidence
|
||||
|
||||
if total_confidence > 0:
|
||||
scores['overall_score'] = total_score / total_confidence
|
||||
|
||||
# Set other scores based on overall score
|
||||
scores['content_quality_score'] = scores['overall_score'] * 1.1
|
||||
scores['engagement_score'] = scores['overall_score'] * 0.9
|
||||
scores['conversion_score'] = scores['overall_score'] * 0.95
|
||||
scores['innovation_score'] = scores['overall_score'] * 1.05
|
||||
|
||||
return scores
|
||||
|
||||
def extract_market_positioning(self, ai_recommendations: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract market positioning from AI recommendations."""
|
||||
return {
|
||||
'industry_position': 'emerging',
|
||||
'competitive_advantage': 'AI-powered content',
|
||||
'market_share': '2.5%',
|
||||
'positioning_score': 4
|
||||
}
|
||||
|
||||
def extract_competitive_advantages(self, ai_recommendations: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Extract competitive advantages from AI recommendations."""
|
||||
return [
|
||||
{
|
||||
'advantage': 'AI-powered content creation',
|
||||
'impact': 'High',
|
||||
'implementation': 'In Progress'
|
||||
},
|
||||
{
|
||||
'advantage': 'Data-driven strategy',
|
||||
'impact': 'Medium',
|
||||
'implementation': 'Complete'
|
||||
}
|
||||
]
|
||||
|
||||
def extract_strategic_risks(self, ai_recommendations: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Extract strategic risks from AI recommendations."""
|
||||
return [
|
||||
{
|
||||
'risk': 'Content saturation in market',
|
||||
'probability': 'Medium',
|
||||
'impact': 'High'
|
||||
},
|
||||
{
|
||||
'risk': 'Algorithm changes affecting reach',
|
||||
'probability': 'High',
|
||||
'impact': 'Medium'
|
||||
}
|
||||
]
|
||||
|
||||
def extract_opportunity_analysis(self, ai_recommendations: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Extract opportunity analysis from AI recommendations."""
|
||||
return [
|
||||
{
|
||||
'opportunity': 'Video content expansion',
|
||||
'potential_impact': 'High',
|
||||
'implementation_ease': 'Medium'
|
||||
},
|
||||
{
|
||||
'opportunity': 'Social media engagement',
|
||||
'potential_impact': 'Medium',
|
||||
'implementation_ease': 'High'
|
||||
}
|
||||
]
|
||||
|
||||
def validate_ai_response_quality(self, ai_response: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate the quality of AI response."""
|
||||
quality_metrics = {
|
||||
'completeness': 0.0,
|
||||
'relevance': 0.0,
|
||||
'actionability': 0.0,
|
||||
'confidence': 0.0,
|
||||
'overall_quality': 0.0
|
||||
}
|
||||
|
||||
# Calculate completeness
|
||||
required_fields = ['recommendations', 'insights', 'metrics']
|
||||
present_fields = sum(1 for field in required_fields if field in ai_response)
|
||||
quality_metrics['completeness'] = present_fields / len(required_fields)
|
||||
|
||||
# Calculate relevance (placeholder logic)
|
||||
quality_metrics['relevance'] = 0.8 if ai_response.get('analysis_type') else 0.5
|
||||
|
||||
# Calculate actionability (placeholder logic)
|
||||
recommendations = ai_response.get('recommendations', [])
|
||||
quality_metrics['actionability'] = min(1.0, len(recommendations) / 5.0)
|
||||
|
||||
# Calculate confidence
|
||||
metrics = ai_response.get('metrics', {})
|
||||
quality_metrics['confidence'] = metrics.get('confidence', 0.5)
|
||||
|
||||
# Calculate overall quality
|
||||
quality_metrics['overall_quality'] = sum(quality_metrics.values()) / len(quality_metrics)
|
||||
|
||||
return quality_metrics
|
||||
|
||||
def assess_strategy_quality(self, strategy_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess the overall quality of a content strategy."""
|
||||
quality_assessment = {
|
||||
'data_completeness': 0.0,
|
||||
'strategic_clarity': 0.0,
|
||||
'implementation_readiness': 0.0,
|
||||
'competitive_positioning': 0.0,
|
||||
'overall_quality': 0.0
|
||||
}
|
||||
|
||||
# Assess data completeness
|
||||
required_fields = [
|
||||
'business_objectives', 'target_metrics', 'content_budget',
|
||||
'team_size', 'implementation_timeline'
|
||||
]
|
||||
present_fields = sum(1 for field in required_fields if strategy_data.get(field))
|
||||
quality_assessment['data_completeness'] = present_fields / len(required_fields)
|
||||
|
||||
# Assess strategic clarity (placeholder logic)
|
||||
quality_assessment['strategic_clarity'] = 0.7 if strategy_data.get('business_objectives') else 0.3
|
||||
|
||||
# Assess implementation readiness (placeholder logic)
|
||||
quality_assessment['implementation_readiness'] = 0.6 if strategy_data.get('team_size') else 0.2
|
||||
|
||||
# Assess competitive positioning (placeholder logic)
|
||||
quality_assessment['competitive_positioning'] = 0.5 if strategy_data.get('competitive_position') else 0.2
|
||||
|
||||
# Calculate overall quality
|
||||
quality_assessment['overall_quality'] = sum(quality_assessment.values()) / len(quality_assessment)
|
||||
|
||||
return quality_assessment
|
||||
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
Core Content Strategy Services
|
||||
Main orchestration and core functionality.
|
||||
"""
|
||||
|
||||
from .strategy_service import EnhancedStrategyService
|
||||
from .field_mappings import STRATEGIC_INPUT_FIELDS
|
||||
from .constants import SERVICE_CONSTANTS
|
||||
|
||||
__all__ = ['EnhancedStrategyService', 'STRATEGIC_INPUT_FIELDS', 'SERVICE_CONSTANTS']
|
||||
@@ -0,0 +1,33 @@
|
||||
"""
|
||||
Service Constants for Content Strategy
|
||||
Configuration and settings for the enhanced strategy service.
|
||||
"""
|
||||
|
||||
# Performance optimization settings
|
||||
PROMPT_VERSIONS = {
|
||||
'comprehensive_strategy': 'v2.1',
|
||||
'audience_intelligence': 'v2.0',
|
||||
'competitive_intelligence': 'v2.0',
|
||||
'performance_optimization': 'v2.1',
|
||||
'content_calendar_optimization': 'v2.0'
|
||||
}
|
||||
|
||||
QUALITY_THRESHOLDS = {
|
||||
'min_confidence': 0.7,
|
||||
'min_completeness': 0.8,
|
||||
'max_response_time': 30.0 # seconds
|
||||
}
|
||||
|
||||
CACHE_SETTINGS = {
|
||||
'ai_analysis_cache_ttl': 3600, # 1 hour
|
||||
'onboarding_data_cache_ttl': 1800, # 30 minutes
|
||||
'strategy_cache_ttl': 7200, # 2 hours
|
||||
'max_cache_size': 1000 # Maximum cached items
|
||||
}
|
||||
|
||||
# Service constants
|
||||
SERVICE_CONSTANTS = {
|
||||
'prompt_versions': PROMPT_VERSIONS,
|
||||
'quality_thresholds': QUALITY_THRESHOLDS,
|
||||
'cache_settings': CACHE_SETTINGS
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
"""
|
||||
Strategic Input Field Mappings
|
||||
Definitions for the 30+ strategic input fields.
|
||||
"""
|
||||
|
||||
# Define the 30+ strategic input fields
|
||||
STRATEGIC_INPUT_FIELDS = {
|
||||
'business_context': [
|
||||
'business_objectives', 'target_metrics', 'content_budget', 'team_size',
|
||||
'implementation_timeline', 'market_share', 'competitive_position', 'performance_metrics'
|
||||
],
|
||||
'audience_intelligence': [
|
||||
'content_preferences', 'consumption_patterns', 'audience_pain_points',
|
||||
'buying_journey', 'seasonal_trends', 'engagement_metrics'
|
||||
],
|
||||
'competitive_intelligence': [
|
||||
'top_competitors', 'competitor_content_strategies', 'market_gaps',
|
||||
'industry_trends', 'emerging_trends'
|
||||
],
|
||||
'content_strategy': [
|
||||
'preferred_formats', 'content_mix', 'content_frequency', 'optimal_timing',
|
||||
'quality_metrics', 'editorial_guidelines', 'brand_voice'
|
||||
],
|
||||
'performance_analytics': [
|
||||
'traffic_sources', 'conversion_rates', 'content_roi_targets', 'ab_testing_capabilities'
|
||||
]
|
||||
}
|
||||
|
||||
# Field categories for organization
|
||||
FIELD_CATEGORIES = {
|
||||
'business_context': {
|
||||
'name': 'Business Context',
|
||||
'description': 'Core business objectives and metrics',
|
||||
'fields': STRATEGIC_INPUT_FIELDS['business_context']
|
||||
},
|
||||
'audience_intelligence': {
|
||||
'name': 'Audience Intelligence',
|
||||
'description': 'Target audience analysis and insights',
|
||||
'fields': STRATEGIC_INPUT_FIELDS['audience_intelligence']
|
||||
},
|
||||
'competitive_intelligence': {
|
||||
'name': 'Competitive Intelligence',
|
||||
'description': 'Competitor analysis and market positioning',
|
||||
'fields': STRATEGIC_INPUT_FIELDS['competitive_intelligence']
|
||||
},
|
||||
'content_strategy': {
|
||||
'name': 'Content Strategy',
|
||||
'description': 'Content planning and execution',
|
||||
'fields': STRATEGIC_INPUT_FIELDS['content_strategy']
|
||||
},
|
||||
'performance_analytics': {
|
||||
'name': 'Performance & Analytics',
|
||||
'description': 'Performance tracking and optimization',
|
||||
'fields': STRATEGIC_INPUT_FIELDS['performance_analytics']
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,349 @@
|
||||
"""
|
||||
Enhanced Strategy Service - Core Module
|
||||
Main orchestration service for content strategy operations.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, List, Union
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
# Import database models
|
||||
from models.enhanced_strategy_models import EnhancedContentStrategy, EnhancedAIAnalysisResult
|
||||
|
||||
# Import modular services
|
||||
from ..ai_analysis.ai_recommendations import AIRecommendationsService
|
||||
from ..ai_analysis.prompt_engineering import PromptEngineeringService
|
||||
from ..ai_analysis.quality_validation import QualityValidationService
|
||||
|
||||
# Import onboarding services
|
||||
from ..onboarding.data_integration import OnboardingDataIntegrationService
|
||||
from ..onboarding.field_transformation import FieldTransformationService
|
||||
from ..onboarding.data_quality import DataQualityService
|
||||
|
||||
# Import performance services
|
||||
from ..performance.caching import CachingService
|
||||
from ..performance.optimization import PerformanceOptimizationService
|
||||
from ..performance.health_monitoring import HealthMonitoringService
|
||||
|
||||
# Import utils services
|
||||
from ..utils.data_processors import DataProcessorService
|
||||
from ..utils.validators import ValidationService
|
||||
|
||||
# Import core components
|
||||
from .field_mappings import STRATEGIC_INPUT_FIELDS
|
||||
from .constants import SERVICE_CONSTANTS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class EnhancedStrategyService:
|
||||
"""Enhanced content strategy service with modular architecture."""
|
||||
|
||||
def __init__(self):
|
||||
# Initialize AI analysis services
|
||||
self.ai_recommendations_service = AIRecommendationsService()
|
||||
self.prompt_engineering_service = PromptEngineeringService()
|
||||
self.quality_validation_service = QualityValidationService()
|
||||
|
||||
# Initialize onboarding services
|
||||
self.onboarding_data_service = OnboardingDataIntegrationService()
|
||||
self.field_transformation_service = FieldTransformationService()
|
||||
self.data_quality_service = DataQualityService()
|
||||
|
||||
# Initialize performance services
|
||||
self.caching_service = CachingService()
|
||||
self.performance_optimization_service = PerformanceOptimizationService()
|
||||
self.health_monitoring_service = HealthMonitoringService()
|
||||
|
||||
# Initialize utils services
|
||||
self.data_processor_service = DataProcessorService()
|
||||
self.validation_service = ValidationService()
|
||||
|
||||
async def create_enhanced_strategy(self, strategy_data: Dict[str, Any], user_id: int, db: Session) -> EnhancedContentStrategy:
|
||||
"""Create enhanced content strategy with all integrations."""
|
||||
try:
|
||||
logger.info(f"Creating enhanced strategy for user: {user_id}")
|
||||
|
||||
# Validate strategy data
|
||||
validation_result = self.validation_service.validate_strategy_data(strategy_data)
|
||||
if not validation_result['is_valid']:
|
||||
logger.error(f"Strategy validation failed: {validation_result['errors']}")
|
||||
raise ValueError(f"Invalid strategy data: {'; '.join(validation_result['errors'])}")
|
||||
|
||||
# Process onboarding data
|
||||
onboarding_data = await self._process_onboarding_data(user_id, db)
|
||||
|
||||
# Transform onboarding data to fields
|
||||
field_transformations = self.field_transformation_service.transform_onboarding_data_to_fields(onboarding_data)
|
||||
|
||||
# Merge strategy data with onboarding data
|
||||
enhanced_strategy_data = self._merge_strategy_with_onboarding(strategy_data, field_transformations)
|
||||
|
||||
# Create strategy object
|
||||
strategy = EnhancedContentStrategy(
|
||||
user_id=user_id,
|
||||
**enhanced_strategy_data,
|
||||
created_at=datetime.utcnow(),
|
||||
updated_at=datetime.utcnow()
|
||||
)
|
||||
|
||||
# Save to database
|
||||
db.add(strategy)
|
||||
db.commit()
|
||||
db.refresh(strategy)
|
||||
|
||||
# Generate AI recommendations
|
||||
await self.ai_recommendations_service.generate_comprehensive_recommendations(strategy, db)
|
||||
|
||||
# Cache strategy data
|
||||
await self.caching_service.cache_strategy(strategy.id, strategy.to_dict())
|
||||
|
||||
logger.info(f"Enhanced strategy created successfully: {strategy.id}")
|
||||
return strategy
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating enhanced strategy: {str(e)}")
|
||||
db.rollback()
|
||||
raise
|
||||
|
||||
async def get_enhanced_strategy(self, strategy_id: int, db: Session) -> Optional[EnhancedContentStrategy]:
|
||||
"""Get enhanced strategy with cached data."""
|
||||
try:
|
||||
# Try to get from cache first
|
||||
cached_strategy = await self.caching_service.get_cached_strategy(strategy_id)
|
||||
if cached_strategy:
|
||||
logger.info(f"Retrieved strategy {strategy_id} from cache")
|
||||
return cached_strategy
|
||||
|
||||
# Get from database
|
||||
strategy = db.query(EnhancedContentStrategy).filter(
|
||||
EnhancedContentStrategy.id == strategy_id
|
||||
).first()
|
||||
|
||||
if strategy:
|
||||
# Cache the strategy
|
||||
await self.caching_service.cache_strategy(strategy_id, strategy.to_dict())
|
||||
|
||||
return strategy
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting enhanced strategy: {str(e)}")
|
||||
return None
|
||||
|
||||
async def update_enhanced_strategy(self, strategy_id: int, update_data: Dict[str, Any], db: Session) -> Optional[EnhancedContentStrategy]:
|
||||
"""Update enhanced strategy."""
|
||||
try:
|
||||
strategy = db.query(EnhancedContentStrategy).filter(
|
||||
EnhancedContentStrategy.id == strategy_id
|
||||
).first()
|
||||
|
||||
if not strategy:
|
||||
return None
|
||||
|
||||
# Validate update data
|
||||
validation_result = self.validation_service.validate_strategy_data(update_data)
|
||||
if not validation_result['is_valid']:
|
||||
logger.error(f"Strategy update validation failed: {validation_result['errors']}")
|
||||
raise ValueError(f"Invalid update data: {'; '.join(validation_result['errors'])}")
|
||||
|
||||
# Update strategy fields
|
||||
for field, value in update_data.items():
|
||||
if hasattr(strategy, field):
|
||||
setattr(strategy, field, value)
|
||||
|
||||
strategy.updated_at = datetime.utcnow()
|
||||
|
||||
# Save to database
|
||||
db.commit()
|
||||
db.refresh(strategy)
|
||||
|
||||
# Invalidate cache
|
||||
await self.caching_service.invalidate_cache('strategy_cache', str(strategy_id))
|
||||
|
||||
# Regenerate AI recommendations if needed
|
||||
if self._should_regenerate_ai_recommendations(update_data):
|
||||
await self.ai_recommendations_service.generate_comprehensive_recommendations(strategy, db)
|
||||
|
||||
logger.info(f"Enhanced strategy updated successfully: {strategy_id}")
|
||||
return strategy
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating enhanced strategy: {str(e)}")
|
||||
db.rollback()
|
||||
raise
|
||||
|
||||
async def get_onboarding_data(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Get onboarding data for auto-population."""
|
||||
try:
|
||||
# Try to get from cache first
|
||||
cached_data = await self.caching_service.get_cached_onboarding_data(user_id)
|
||||
if cached_data:
|
||||
logger.info(f"Retrieved onboarding data for user {user_id} from cache")
|
||||
return cached_data
|
||||
|
||||
# Process onboarding data
|
||||
onboarding_data = await self._process_onboarding_data(user_id, db)
|
||||
|
||||
# Cache the data
|
||||
await self.caching_service.cache_onboarding_data(user_id, onboarding_data)
|
||||
|
||||
return onboarding_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting onboarding data: {str(e)}")
|
||||
return {}
|
||||
|
||||
async def get_ai_analysis(self, strategy_id: int, analysis_type: str, db: Session) -> Optional[Dict[str, Any]]:
|
||||
"""Get AI analysis results."""
|
||||
try:
|
||||
# Try to get from cache first
|
||||
cached_analysis = await self.caching_service.get_cached_ai_analysis(strategy_id, analysis_type)
|
||||
if cached_analysis:
|
||||
logger.info(f"Retrieved AI analysis for strategy {strategy_id} from cache")
|
||||
return cached_analysis
|
||||
|
||||
# Get from database
|
||||
analysis = db.query(EnhancedAIAnalysisResult).filter(
|
||||
EnhancedAIAnalysisResult.strategy_id == strategy_id,
|
||||
EnhancedAIAnalysisResult.analysis_type == analysis_type
|
||||
).order_by(EnhancedAIAnalysisResult.created_at.desc()).first()
|
||||
|
||||
if analysis:
|
||||
analysis_data = analysis.to_dict()
|
||||
# Cache the analysis
|
||||
await self.caching_service.cache_ai_analysis(strategy_id, analysis_type, analysis_data)
|
||||
return analysis_data
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting AI analysis: {str(e)}")
|
||||
return None
|
||||
|
||||
async def get_system_health(self, db: Session) -> Dict[str, Any]:
|
||||
"""Get system health status."""
|
||||
try:
|
||||
return await self.health_monitoring_service.check_system_health(
|
||||
db,
|
||||
self.caching_service,
|
||||
self.ai_recommendations_service
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting system health: {str(e)}")
|
||||
return {
|
||||
'overall_status': 'error',
|
||||
'error': str(e),
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def get_performance_report(self) -> Dict[str, Any]:
|
||||
"""Get performance optimization report."""
|
||||
try:
|
||||
return await self.performance_optimization_service.get_performance_report()
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting performance report: {str(e)}")
|
||||
return {
|
||||
'error': str(e),
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def _process_onboarding_data(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Process onboarding data for a user."""
|
||||
try:
|
||||
# Get integrated onboarding data
|
||||
integrated_data = await self.onboarding_data_service.process_onboarding_data(user_id, db)
|
||||
|
||||
# Assess data quality
|
||||
quality_assessment = self.data_quality_service.assess_onboarding_data_quality(integrated_data)
|
||||
|
||||
# Add quality assessment to integrated data
|
||||
integrated_data['quality_assessment'] = quality_assessment
|
||||
|
||||
return integrated_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing onboarding data: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _merge_strategy_with_onboarding(self, strategy_data: Dict[str, Any], field_transformations: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Merge strategy data with onboarding field transformations."""
|
||||
try:
|
||||
merged_data = strategy_data.copy()
|
||||
|
||||
# Add auto-populated fields from onboarding data
|
||||
if 'fields' in field_transformations:
|
||||
for field_name, field_value in field_transformations['fields'].items():
|
||||
if field_name not in merged_data or not merged_data[field_name]:
|
||||
merged_data[field_name] = field_value
|
||||
|
||||
# Add data sources information
|
||||
if 'sources' in field_transformations:
|
||||
merged_data['data_sources'] = field_transformations['sources']
|
||||
|
||||
return merged_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error merging strategy with onboarding: {str(e)}")
|
||||
return strategy_data
|
||||
|
||||
def _should_regenerate_ai_recommendations(self, update_data: Dict[str, Any]) -> bool:
|
||||
"""Determine if AI recommendations should be regenerated."""
|
||||
try:
|
||||
# Fields that would trigger AI recommendation regeneration
|
||||
ai_trigger_fields = [
|
||||
'business_objectives', 'target_metrics', 'content_budget',
|
||||
'team_size', 'implementation_timeline', 'market_share',
|
||||
'competitive_position', 'content_preferences', 'audience_pain_points',
|
||||
'top_competitors', 'industry_trends'
|
||||
]
|
||||
|
||||
return any(field in update_data for field in ai_trigger_fields)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking if AI recommendations should be regenerated: {str(e)}")
|
||||
return False
|
||||
|
||||
def get_strategic_input_fields(self) -> List[Dict[str, Any]]:
|
||||
"""Get strategic input field definitions."""
|
||||
return STRATEGIC_INPUT_FIELDS
|
||||
|
||||
def get_service_constants(self) -> Dict[str, Any]:
|
||||
"""Get service configuration constants."""
|
||||
return SERVICE_CONSTANTS
|
||||
|
||||
async def validate_strategy_data(self, strategy_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate strategy data using the validation service."""
|
||||
try:
|
||||
return self.validation_service.validate_strategy_data(strategy_data)
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating strategy data: {str(e)}")
|
||||
return {
|
||||
'is_valid': False,
|
||||
'errors': [f"Validation error: {str(e)}"],
|
||||
'warnings': [],
|
||||
'field_validations': {},
|
||||
'validation_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def process_data_for_output(self, data: Dict[str, Any], output_format: str = 'json') -> Union[str, Dict[str, Any]]:
|
||||
"""Process data for different output formats."""
|
||||
try:
|
||||
return self.data_processor_service.format_data_for_output(data, output_format)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing data for output: {str(e)}")
|
||||
return str(data)
|
||||
|
||||
async def optimize_strategy_operation(self, operation_name: str, operation_func, *args, **kwargs) -> Dict[str, Any]:
|
||||
"""Optimize strategy operations with performance monitoring."""
|
||||
try:
|
||||
return await self.performance_optimization_service.optimize_response_time(
|
||||
operation_name, operation_func, *args, **kwargs
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing strategy operation: {str(e)}")
|
||||
return {
|
||||
'result': None,
|
||||
'response_time': 0.0,
|
||||
'optimization_suggestions': ['Error occurred during optimization'],
|
||||
'performance_status': 'error'
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
Onboarding Module
|
||||
Onboarding data integration and processing services.
|
||||
"""
|
||||
|
||||
from .data_integration import OnboardingDataIntegrationService
|
||||
from .field_transformation import FieldTransformationService
|
||||
from .data_quality import DataQualityService
|
||||
|
||||
__all__ = ['OnboardingDataIntegrationService', 'FieldTransformationService', 'DataQualityService']
|
||||
@@ -0,0 +1,381 @@
|
||||
"""
|
||||
Onboarding Data Integration Service
|
||||
Onboarding data integration and processing.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
# Import database models
|
||||
from models.enhanced_strategy_models import (
|
||||
OnboardingDataIntegration
|
||||
)
|
||||
from models.onboarding import (
|
||||
OnboardingSession,
|
||||
WebsiteAnalysis,
|
||||
ResearchPreferences,
|
||||
APIKey
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class OnboardingDataIntegrationService:
|
||||
"""Service for onboarding data integration and processing."""
|
||||
|
||||
def __init__(self):
|
||||
self.data_freshness_threshold = timedelta(hours=24)
|
||||
self.max_analysis_age = timedelta(days=7)
|
||||
|
||||
async def process_onboarding_data(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Process and integrate all onboarding data for a user."""
|
||||
try:
|
||||
logger.info(f"Processing onboarding data for user: {user_id}")
|
||||
|
||||
# Get all onboarding data sources
|
||||
website_analysis = self._get_website_analysis(user_id, db)
|
||||
research_preferences = self._get_research_preferences(user_id, db)
|
||||
api_keys_data = self._get_api_keys_data(user_id, db)
|
||||
onboarding_session = self._get_onboarding_session(user_id, db)
|
||||
|
||||
# Process and integrate data
|
||||
integrated_data = {
|
||||
'website_analysis': website_analysis,
|
||||
'research_preferences': research_preferences,
|
||||
'api_keys_data': api_keys_data,
|
||||
'onboarding_session': onboarding_session,
|
||||
'data_quality': self._assess_data_quality(website_analysis, research_preferences, api_keys_data),
|
||||
'processing_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Store integrated data
|
||||
await self._store_integrated_data(user_id, integrated_data, db)
|
||||
|
||||
logger.info(f"Onboarding data processed successfully for user: {user_id}")
|
||||
return integrated_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing onboarding data for user {user_id}: {str(e)}")
|
||||
return self._get_fallback_data()
|
||||
|
||||
def _get_website_analysis(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Get website analysis data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Get the latest website analysis for this session
|
||||
website_analysis = db.query(WebsiteAnalysis).filter(
|
||||
WebsiteAnalysis.session_id == session.id
|
||||
).order_by(WebsiteAnalysis.updated_at.desc()).first()
|
||||
|
||||
if not website_analysis:
|
||||
logger.warning(f"No website analysis found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Convert to dictionary and add metadata
|
||||
analysis_data = website_analysis.to_dict()
|
||||
analysis_data['data_freshness'] = self._calculate_freshness(website_analysis.updated_at)
|
||||
analysis_data['confidence_level'] = 0.9 if website_analysis.status == 'completed' else 0.5
|
||||
|
||||
logger.info(f"Retrieved website analysis for user {user_id}: {website_analysis.website_url}")
|
||||
return analysis_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting website analysis for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_research_preferences(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Get research preferences data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Get research preferences for this session
|
||||
research_prefs = db.query(ResearchPreferences).filter(
|
||||
ResearchPreferences.session_id == session.id
|
||||
).first()
|
||||
|
||||
if not research_prefs:
|
||||
logger.warning(f"No research preferences found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Convert to dictionary and add metadata
|
||||
prefs_data = research_prefs.to_dict()
|
||||
prefs_data['data_freshness'] = self._calculate_freshness(research_prefs.updated_at)
|
||||
prefs_data['confidence_level'] = 0.9
|
||||
|
||||
logger.info(f"Retrieved research preferences for user {user_id}")
|
||||
return prefs_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting research preferences for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_api_keys_data(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Get API keys data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Get all API keys for this session
|
||||
api_keys = db.query(APIKey).filter(
|
||||
APIKey.session_id == session.id
|
||||
).all()
|
||||
|
||||
if not api_keys:
|
||||
logger.warning(f"No API keys found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Convert to dictionary format
|
||||
api_data = {
|
||||
'api_keys': [key.to_dict() for key in api_keys],
|
||||
'total_keys': len(api_keys),
|
||||
'providers': [key.provider for key in api_keys],
|
||||
'data_freshness': self._calculate_freshness(session.updated_at),
|
||||
'confidence_level': 0.8
|
||||
}
|
||||
|
||||
logger.info(f"Retrieved {len(api_keys)} API keys for user {user_id}")
|
||||
return api_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting API keys data for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_onboarding_session(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Get onboarding session data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Convert to dictionary
|
||||
session_data = {
|
||||
'id': session.id,
|
||||
'user_id': session.user_id,
|
||||
'current_step': session.current_step,
|
||||
'progress': session.progress,
|
||||
'started_at': session.started_at.isoformat() if session.started_at else None,
|
||||
'updated_at': session.updated_at.isoformat() if session.updated_at else None,
|
||||
'data_freshness': self._calculate_freshness(session.updated_at),
|
||||
'confidence_level': 0.9
|
||||
}
|
||||
|
||||
logger.info(f"Retrieved onboarding session for user {user_id}: step {session.current_step}, progress {session.progress}%")
|
||||
return session_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting onboarding session for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _assess_data_quality(self, website_analysis: Dict, research_preferences: Dict, api_keys_data: Dict) -> Dict[str, Any]:
|
||||
"""Assess the quality and completeness of onboarding data."""
|
||||
try:
|
||||
quality_metrics = {
|
||||
'overall_score': 0.0,
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'relevance': 0.0,
|
||||
'confidence': 0.0
|
||||
}
|
||||
|
||||
# Calculate completeness
|
||||
total_fields = 0
|
||||
filled_fields = 0
|
||||
|
||||
# Website analysis completeness
|
||||
website_fields = ['domain', 'industry', 'business_type', 'target_audience', 'content_goals']
|
||||
for field in website_fields:
|
||||
total_fields += 1
|
||||
if website_analysis.get(field):
|
||||
filled_fields += 1
|
||||
|
||||
# Research preferences completeness
|
||||
research_fields = ['research_topics', 'content_types', 'target_audience', 'industry_focus']
|
||||
for field in research_fields:
|
||||
total_fields += 1
|
||||
if research_preferences.get(field):
|
||||
filled_fields += 1
|
||||
|
||||
# API keys completeness
|
||||
total_fields += 1
|
||||
if api_keys_data:
|
||||
filled_fields += 1
|
||||
|
||||
quality_metrics['completeness'] = filled_fields / total_fields if total_fields > 0 else 0.0
|
||||
|
||||
# Calculate freshness
|
||||
freshness_scores = []
|
||||
for data_source in [website_analysis, research_preferences]:
|
||||
if data_source.get('data_freshness'):
|
||||
freshness_scores.append(data_source['data_freshness'])
|
||||
|
||||
quality_metrics['freshness'] = sum(freshness_scores) / len(freshness_scores) if freshness_scores else 0.0
|
||||
|
||||
# Calculate relevance (based on data presence and quality)
|
||||
relevance_score = 0.0
|
||||
if website_analysis.get('domain'):
|
||||
relevance_score += 0.4
|
||||
if research_preferences.get('research_topics'):
|
||||
relevance_score += 0.3
|
||||
if api_keys_data:
|
||||
relevance_score += 0.3
|
||||
|
||||
quality_metrics['relevance'] = relevance_score
|
||||
|
||||
# Calculate confidence
|
||||
quality_metrics['confidence'] = (quality_metrics['completeness'] + quality_metrics['freshness'] + quality_metrics['relevance']) / 3
|
||||
|
||||
# Calculate overall score
|
||||
quality_metrics['overall_score'] = quality_metrics['confidence']
|
||||
|
||||
return quality_metrics
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing data quality: {str(e)}")
|
||||
return {
|
||||
'overall_score': 0.0,
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'relevance': 0.0,
|
||||
'confidence': 0.0
|
||||
}
|
||||
|
||||
def _calculate_freshness(self, created_at: datetime) -> float:
|
||||
"""Calculate data freshness score (0.0 to 1.0)."""
|
||||
try:
|
||||
age = datetime.utcnow() - created_at
|
||||
|
||||
if age <= self.data_freshness_threshold:
|
||||
return 1.0
|
||||
elif age <= self.max_analysis_age:
|
||||
# Linear decay from 1.0 to 0.5
|
||||
decay_factor = 1.0 - (age - self.data_freshness_threshold) / (self.max_analysis_age - self.data_freshness_threshold) * 0.5
|
||||
return max(0.5, decay_factor)
|
||||
else:
|
||||
return 0.5 # Minimum freshness for old data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating data freshness: {str(e)}")
|
||||
return 0.5
|
||||
|
||||
def _check_api_data_availability(self, api_key_data: Dict) -> bool:
|
||||
"""Check if API key has available data."""
|
||||
try:
|
||||
# Check if API key has been used recently and has data
|
||||
if api_key_data.get('last_used') and api_key_data.get('usage_count', 0) > 0:
|
||||
return api_key_data.get('data_available', False)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking API data availability: {str(e)}")
|
||||
return False
|
||||
|
||||
async def _store_integrated_data(self, user_id: int, integrated_data: Dict[str, Any], db: Session) -> None:
|
||||
"""Store integrated onboarding data."""
|
||||
try:
|
||||
# Create or update integrated data record
|
||||
existing_record = db.query(OnboardingDataIntegration).filter(
|
||||
OnboardingDataIntegration.user_id == user_id
|
||||
).first()
|
||||
|
||||
if existing_record:
|
||||
existing_record.website_analysis_data = integrated_data.get('website_analysis', {})
|
||||
existing_record.research_preferences_data = integrated_data.get('research_preferences', {})
|
||||
existing_record.api_keys_data = integrated_data.get('api_keys_data', {})
|
||||
existing_record.updated_at = datetime.utcnow()
|
||||
else:
|
||||
new_record = OnboardingDataIntegration(
|
||||
user_id=user_id,
|
||||
website_analysis_data=integrated_data.get('website_analysis', {}),
|
||||
research_preferences_data=integrated_data.get('research_preferences', {}),
|
||||
api_keys_data=integrated_data.get('api_keys_data', {}),
|
||||
created_at=datetime.utcnow(),
|
||||
updated_at=datetime.utcnow()
|
||||
)
|
||||
db.add(new_record)
|
||||
|
||||
db.commit()
|
||||
logger.info(f"Integrated onboarding data stored for user: {user_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error storing integrated data for user {user_id}: {str(e)}")
|
||||
db.rollback()
|
||||
|
||||
def _get_fallback_data(self) -> Dict[str, Any]:
|
||||
"""Get fallback data when processing fails."""
|
||||
return {
|
||||
'website_analysis': {},
|
||||
'research_preferences': {},
|
||||
'api_keys_data': {},
|
||||
'onboarding_session': {},
|
||||
'data_quality': {
|
||||
'overall_score': 0.0,
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'relevance': 0.0,
|
||||
'confidence': 0.0
|
||||
},
|
||||
'processing_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def get_integrated_data(self, user_id: int, db: Session) -> Optional[Dict[str, Any]]:
|
||||
"""Get previously integrated onboarding data for a user."""
|
||||
try:
|
||||
record = db.query(OnboardingDataIntegration).filter(
|
||||
OnboardingDataIntegration.user_id == user_id
|
||||
).first()
|
||||
|
||||
if record:
|
||||
# Reconstruct integrated data from stored fields
|
||||
integrated_data = {
|
||||
'website_analysis': record.website_analysis_data or {},
|
||||
'research_preferences': record.research_preferences_data or {},
|
||||
'api_keys_data': record.api_keys_data or {},
|
||||
'onboarding_session': {},
|
||||
'data_quality': self._assess_data_quality(
|
||||
record.website_analysis_data or {},
|
||||
record.research_preferences_data or {},
|
||||
record.api_keys_data or {}
|
||||
),
|
||||
'processing_timestamp': record.updated_at.isoformat()
|
||||
}
|
||||
|
||||
# Check if data is still fresh
|
||||
updated_at = record.updated_at
|
||||
if datetime.utcnow() - updated_at <= self.data_freshness_threshold:
|
||||
return integrated_data
|
||||
else:
|
||||
logger.info(f"Integrated data is stale for user {user_id}, reprocessing...")
|
||||
return await self.process_onboarding_data(user_id, db)
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting integrated data for user {user_id}: {str(e)}")
|
||||
return None
|
||||
@@ -0,0 +1,547 @@
|
||||
"""
|
||||
Data Quality Service
|
||||
Onboarding data quality assessment.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DataQualityService:
|
||||
"""Service for assessing data quality and validation."""
|
||||
|
||||
def __init__(self):
|
||||
self.quality_thresholds = {
|
||||
'excellent': 0.9,
|
||||
'good': 0.7,
|
||||
'fair': 0.5,
|
||||
'poor': 0.3
|
||||
}
|
||||
|
||||
self.data_freshness_threshold = timedelta(hours=24)
|
||||
self.max_data_age = timedelta(days=30)
|
||||
|
||||
def assess_onboarding_data_quality(self, integrated_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess the overall quality of onboarding data."""
|
||||
try:
|
||||
logger.info("Assessing onboarding data quality")
|
||||
|
||||
quality_assessment = {
|
||||
'overall_score': 0.0,
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'accuracy': 0.0,
|
||||
'relevance': 0.0,
|
||||
'consistency': 0.0,
|
||||
'confidence': 0.0,
|
||||
'quality_level': 'poor',
|
||||
'recommendations': [],
|
||||
'issues': [],
|
||||
'assessment_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Assess each data source
|
||||
website_quality = self._assess_website_analysis_quality(integrated_data.get('website_analysis', {}))
|
||||
research_quality = self._assess_research_preferences_quality(integrated_data.get('research_preferences', {}))
|
||||
api_quality = self._assess_api_keys_quality(integrated_data.get('api_keys_data', {}))
|
||||
session_quality = self._assess_onboarding_session_quality(integrated_data.get('onboarding_session', {}))
|
||||
|
||||
# Calculate overall quality metrics
|
||||
quality_assessment['completeness'] = self._calculate_completeness_score(
|
||||
website_quality, research_quality, api_quality, session_quality
|
||||
)
|
||||
|
||||
quality_assessment['freshness'] = self._calculate_freshness_score(
|
||||
website_quality, research_quality, api_quality, session_quality
|
||||
)
|
||||
|
||||
quality_assessment['accuracy'] = self._calculate_accuracy_score(
|
||||
website_quality, research_quality, api_quality, session_quality
|
||||
)
|
||||
|
||||
quality_assessment['relevance'] = self._calculate_relevance_score(
|
||||
website_quality, research_quality, api_quality, session_quality
|
||||
)
|
||||
|
||||
quality_assessment['consistency'] = self._calculate_consistency_score(
|
||||
website_quality, research_quality, api_quality, session_quality
|
||||
)
|
||||
|
||||
# Calculate confidence and overall score
|
||||
quality_assessment['confidence'] = (
|
||||
quality_assessment['completeness'] +
|
||||
quality_assessment['freshness'] +
|
||||
quality_assessment['accuracy'] +
|
||||
quality_assessment['relevance'] +
|
||||
quality_assessment['consistency']
|
||||
) / 5
|
||||
|
||||
quality_assessment['overall_score'] = quality_assessment['confidence']
|
||||
|
||||
# Determine quality level
|
||||
quality_assessment['quality_level'] = self._determine_quality_level(quality_assessment['overall_score'])
|
||||
|
||||
# Generate recommendations and identify issues
|
||||
quality_assessment['recommendations'] = self._generate_quality_recommendations(quality_assessment)
|
||||
quality_assessment['issues'] = self._identify_quality_issues(quality_assessment)
|
||||
|
||||
logger.info(f"Data quality assessment completed. Overall score: {quality_assessment['overall_score']:.2f}")
|
||||
return quality_assessment
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing data quality: {str(e)}")
|
||||
return self._get_fallback_quality_assessment()
|
||||
|
||||
def _assess_website_analysis_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess quality of website analysis data."""
|
||||
try:
|
||||
quality_metrics = {
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'accuracy': 0.0,
|
||||
'relevance': 0.0,
|
||||
'consistency': 0.0
|
||||
}
|
||||
|
||||
if not website_data:
|
||||
return quality_metrics
|
||||
|
||||
# Completeness assessment
|
||||
required_fields = ['domain', 'industry', 'business_type', 'target_audience', 'content_goals']
|
||||
present_fields = sum(1 for field in required_fields if website_data.get(field))
|
||||
quality_metrics['completeness'] = present_fields / len(required_fields)
|
||||
|
||||
# Freshness assessment
|
||||
if website_data.get('created_at'):
|
||||
try:
|
||||
created_at = datetime.fromisoformat(website_data['created_at'].replace('Z', '+00:00'))
|
||||
age = datetime.utcnow() - created_at
|
||||
quality_metrics['freshness'] = self._calculate_freshness_score_from_age(age)
|
||||
except Exception:
|
||||
quality_metrics['freshness'] = 0.5
|
||||
|
||||
# Accuracy assessment (based on data presence and format)
|
||||
accuracy_score = 0.0
|
||||
if website_data.get('domain') and isinstance(website_data['domain'], str):
|
||||
accuracy_score += 0.2
|
||||
if website_data.get('industry') and isinstance(website_data['industry'], str):
|
||||
accuracy_score += 0.2
|
||||
if website_data.get('business_type') and isinstance(website_data['business_type'], str):
|
||||
accuracy_score += 0.2
|
||||
if website_data.get('target_audience') and isinstance(website_data['target_audience'], str):
|
||||
accuracy_score += 0.2
|
||||
if website_data.get('content_goals') and isinstance(website_data['content_goals'], (str, list)):
|
||||
accuracy_score += 0.2
|
||||
quality_metrics['accuracy'] = accuracy_score
|
||||
|
||||
# Relevance assessment
|
||||
relevance_score = 0.0
|
||||
if website_data.get('domain'):
|
||||
relevance_score += 0.3
|
||||
if website_data.get('industry'):
|
||||
relevance_score += 0.3
|
||||
if website_data.get('content_goals'):
|
||||
relevance_score += 0.4
|
||||
quality_metrics['relevance'] = relevance_score
|
||||
|
||||
# Consistency assessment
|
||||
consistency_score = 0.0
|
||||
if website_data.get('domain') and website_data.get('industry'):
|
||||
consistency_score += 0.5
|
||||
if website_data.get('target_audience') and website_data.get('content_goals'):
|
||||
consistency_score += 0.5
|
||||
quality_metrics['consistency'] = consistency_score
|
||||
|
||||
return quality_metrics
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing website analysis quality: {str(e)}")
|
||||
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
|
||||
|
||||
def _assess_research_preferences_quality(self, research_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess quality of research preferences data."""
|
||||
try:
|
||||
quality_metrics = {
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'accuracy': 0.0,
|
||||
'relevance': 0.0,
|
||||
'consistency': 0.0
|
||||
}
|
||||
|
||||
if not research_data:
|
||||
return quality_metrics
|
||||
|
||||
# Completeness assessment
|
||||
required_fields = ['research_topics', 'content_types', 'target_audience', 'industry_focus']
|
||||
present_fields = sum(1 for field in required_fields if research_data.get(field))
|
||||
quality_metrics['completeness'] = present_fields / len(required_fields)
|
||||
|
||||
# Freshness assessment
|
||||
if research_data.get('created_at'):
|
||||
try:
|
||||
created_at = datetime.fromisoformat(research_data['created_at'].replace('Z', '+00:00'))
|
||||
age = datetime.utcnow() - created_at
|
||||
quality_metrics['freshness'] = self._calculate_freshness_score_from_age(age)
|
||||
except Exception:
|
||||
quality_metrics['freshness'] = 0.5
|
||||
|
||||
# Accuracy assessment
|
||||
accuracy_score = 0.0
|
||||
if research_data.get('research_topics') and isinstance(research_data['research_topics'], (str, list)):
|
||||
accuracy_score += 0.25
|
||||
if research_data.get('content_types') and isinstance(research_data['content_types'], (str, list)):
|
||||
accuracy_score += 0.25
|
||||
if research_data.get('target_audience') and isinstance(research_data['target_audience'], str):
|
||||
accuracy_score += 0.25
|
||||
if research_data.get('industry_focus') and isinstance(research_data['industry_focus'], str):
|
||||
accuracy_score += 0.25
|
||||
quality_metrics['accuracy'] = accuracy_score
|
||||
|
||||
# Relevance assessment
|
||||
relevance_score = 0.0
|
||||
if research_data.get('research_topics'):
|
||||
relevance_score += 0.4
|
||||
if research_data.get('content_types'):
|
||||
relevance_score += 0.3
|
||||
if research_data.get('target_audience'):
|
||||
relevance_score += 0.3
|
||||
quality_metrics['relevance'] = relevance_score
|
||||
|
||||
# Consistency assessment
|
||||
consistency_score = 0.0
|
||||
if research_data.get('research_topics') and research_data.get('content_types'):
|
||||
consistency_score += 0.5
|
||||
if research_data.get('target_audience') and research_data.get('industry_focus'):
|
||||
consistency_score += 0.5
|
||||
quality_metrics['consistency'] = consistency_score
|
||||
|
||||
return quality_metrics
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing research preferences quality: {str(e)}")
|
||||
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
|
||||
|
||||
def _assess_api_keys_quality(self, api_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess quality of API keys data."""
|
||||
try:
|
||||
quality_metrics = {
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'accuracy': 0.0,
|
||||
'relevance': 0.0,
|
||||
'consistency': 0.0
|
||||
}
|
||||
|
||||
if not api_data:
|
||||
return quality_metrics
|
||||
|
||||
# Completeness assessment
|
||||
total_apis = len(api_data)
|
||||
active_apis = sum(1 for api_info in api_data.values() if api_info.get('is_active'))
|
||||
quality_metrics['completeness'] = active_apis / max(total_apis, 1)
|
||||
|
||||
# Freshness assessment
|
||||
freshness_scores = []
|
||||
for api_info in api_data.values():
|
||||
if api_info.get('last_used'):
|
||||
try:
|
||||
last_used = datetime.fromisoformat(api_info['last_used'].replace('Z', '+00:00'))
|
||||
age = datetime.utcnow() - last_used
|
||||
freshness_scores.append(self._calculate_freshness_score_from_age(age))
|
||||
except Exception:
|
||||
freshness_scores.append(0.5)
|
||||
|
||||
quality_metrics['freshness'] = sum(freshness_scores) / len(freshness_scores) if freshness_scores else 0.5
|
||||
|
||||
# Accuracy assessment
|
||||
accuracy_score = 0.0
|
||||
for api_info in api_data.values():
|
||||
if api_info.get('service_name') and api_info.get('is_active'):
|
||||
accuracy_score += 0.5
|
||||
if api_info.get('data_available'):
|
||||
accuracy_score += 0.5
|
||||
quality_metrics['accuracy'] = accuracy_score / max(len(api_data), 1)
|
||||
|
||||
# Relevance assessment
|
||||
relevant_apis = ['google_analytics', 'google_search_console', 'semrush', 'ahrefs', 'moz']
|
||||
relevant_count = sum(1 for api_name in api_data.keys() if api_name.lower() in relevant_apis)
|
||||
quality_metrics['relevance'] = relevant_count / max(len(api_data), 1)
|
||||
|
||||
# Consistency assessment
|
||||
consistency_score = 0.0
|
||||
if len(api_data) > 0:
|
||||
consistency_score = 0.5 # Basic consistency if APIs exist
|
||||
if any(api_info.get('data_available') for api_info in api_data.values()):
|
||||
consistency_score += 0.5
|
||||
quality_metrics['consistency'] = consistency_score
|
||||
|
||||
return quality_metrics
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing API keys quality: {str(e)}")
|
||||
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
|
||||
|
||||
def _assess_onboarding_session_quality(self, session_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess quality of onboarding session data."""
|
||||
try:
|
||||
quality_metrics = {
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'accuracy': 0.0,
|
||||
'relevance': 0.0,
|
||||
'consistency': 0.0
|
||||
}
|
||||
|
||||
if not session_data:
|
||||
return quality_metrics
|
||||
|
||||
# Completeness assessment
|
||||
required_fields = ['session_id', 'completion_percentage', 'completed_steps', 'current_step']
|
||||
present_fields = sum(1 for field in required_fields if session_data.get(field))
|
||||
quality_metrics['completeness'] = present_fields / len(required_fields)
|
||||
|
||||
# Freshness assessment
|
||||
if session_data.get('updated_at'):
|
||||
try:
|
||||
updated_at = datetime.fromisoformat(session_data['updated_at'].replace('Z', '+00:00'))
|
||||
age = datetime.utcnow() - updated_at
|
||||
quality_metrics['freshness'] = self._calculate_freshness_score_from_age(age)
|
||||
except Exception:
|
||||
quality_metrics['freshness'] = 0.5
|
||||
|
||||
# Accuracy assessment
|
||||
accuracy_score = 0.0
|
||||
if session_data.get('session_id') and isinstance(session_data['session_id'], str):
|
||||
accuracy_score += 0.25
|
||||
if session_data.get('completion_percentage') and isinstance(session_data['completion_percentage'], (int, float)):
|
||||
accuracy_score += 0.25
|
||||
if session_data.get('completed_steps') and isinstance(session_data['completed_steps'], (list, int)):
|
||||
accuracy_score += 0.25
|
||||
if session_data.get('current_step') and isinstance(session_data['current_step'], (str, int)):
|
||||
accuracy_score += 0.25
|
||||
quality_metrics['accuracy'] = accuracy_score
|
||||
|
||||
# Relevance assessment
|
||||
relevance_score = 0.0
|
||||
if session_data.get('completion_percentage', 0) > 50:
|
||||
relevance_score += 0.5
|
||||
if session_data.get('session_data'):
|
||||
relevance_score += 0.5
|
||||
quality_metrics['relevance'] = relevance_score
|
||||
|
||||
# Consistency assessment
|
||||
consistency_score = 0.0
|
||||
if session_data.get('completion_percentage') and session_data.get('completed_steps'):
|
||||
consistency_score += 0.5
|
||||
if session_data.get('current_step') and session_data.get('session_id'):
|
||||
consistency_score += 0.5
|
||||
quality_metrics['consistency'] = consistency_score
|
||||
|
||||
return quality_metrics
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing onboarding session quality: {str(e)}")
|
||||
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
|
||||
|
||||
def _calculate_completeness_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
|
||||
"""Calculate overall completeness score."""
|
||||
try:
|
||||
scores = [
|
||||
website_quality['completeness'],
|
||||
research_quality['completeness'],
|
||||
api_quality['completeness'],
|
||||
session_quality['completeness']
|
||||
]
|
||||
return sum(scores) / len(scores)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating completeness score: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_freshness_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
|
||||
"""Calculate overall freshness score."""
|
||||
try:
|
||||
scores = [
|
||||
website_quality['freshness'],
|
||||
research_quality['freshness'],
|
||||
api_quality['freshness'],
|
||||
session_quality['freshness']
|
||||
]
|
||||
return sum(scores) / len(scores)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating freshness score: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_accuracy_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
|
||||
"""Calculate overall accuracy score."""
|
||||
try:
|
||||
scores = [
|
||||
website_quality['accuracy'],
|
||||
research_quality['accuracy'],
|
||||
api_quality['accuracy'],
|
||||
session_quality['accuracy']
|
||||
]
|
||||
return sum(scores) / len(scores)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating accuracy score: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_relevance_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
|
||||
"""Calculate overall relevance score."""
|
||||
try:
|
||||
scores = [
|
||||
website_quality['relevance'],
|
||||
research_quality['relevance'],
|
||||
api_quality['relevance'],
|
||||
session_quality['relevance']
|
||||
]
|
||||
return sum(scores) / len(scores)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating relevance score: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_consistency_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
|
||||
"""Calculate overall consistency score."""
|
||||
try:
|
||||
scores = [
|
||||
website_quality['consistency'],
|
||||
research_quality['consistency'],
|
||||
api_quality['consistency'],
|
||||
session_quality['consistency']
|
||||
]
|
||||
return sum(scores) / len(scores)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating consistency score: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_freshness_score_from_age(self, age: timedelta) -> float:
|
||||
"""Calculate freshness score based on data age."""
|
||||
try:
|
||||
if age <= self.data_freshness_threshold:
|
||||
return 1.0
|
||||
elif age <= self.max_data_age:
|
||||
# Linear decay from 1.0 to 0.5
|
||||
decay_factor = 1.0 - (age - self.data_freshness_threshold) / (self.max_data_age - self.data_freshness_threshold) * 0.5
|
||||
return max(0.5, decay_factor)
|
||||
else:
|
||||
return 0.5 # Minimum freshness for old data
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating freshness score from age: {str(e)}")
|
||||
return 0.5
|
||||
|
||||
def _determine_quality_level(self, overall_score: float) -> str:
|
||||
"""Determine quality level based on overall score."""
|
||||
try:
|
||||
if overall_score >= self.quality_thresholds['excellent']:
|
||||
return 'excellent'
|
||||
elif overall_score >= self.quality_thresholds['good']:
|
||||
return 'good'
|
||||
elif overall_score >= self.quality_thresholds['fair']:
|
||||
return 'fair'
|
||||
else:
|
||||
return 'poor'
|
||||
except Exception as e:
|
||||
logger.error(f"Error determining quality level: {str(e)}")
|
||||
return 'poor'
|
||||
|
||||
def _generate_quality_recommendations(self, quality_assessment: Dict[str, Any]) -> List[str]:
|
||||
"""Generate recommendations based on quality assessment."""
|
||||
try:
|
||||
recommendations = []
|
||||
|
||||
if quality_assessment['completeness'] < 0.7:
|
||||
recommendations.append("Complete missing onboarding data to improve strategy accuracy")
|
||||
|
||||
if quality_assessment['freshness'] < 0.7:
|
||||
recommendations.append("Update stale data to ensure current market insights")
|
||||
|
||||
if quality_assessment['accuracy'] < 0.7:
|
||||
recommendations.append("Verify data accuracy for better strategy recommendations")
|
||||
|
||||
if quality_assessment['relevance'] < 0.7:
|
||||
recommendations.append("Provide more relevant data for targeted strategy development")
|
||||
|
||||
if quality_assessment['consistency'] < 0.7:
|
||||
recommendations.append("Ensure data consistency across different sources")
|
||||
|
||||
if quality_assessment['overall_score'] < 0.5:
|
||||
recommendations.append("Consider re-running onboarding process for better data quality")
|
||||
|
||||
return recommendations
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating quality recommendations: {str(e)}")
|
||||
return ["Unable to generate recommendations due to assessment error"]
|
||||
|
||||
def _identify_quality_issues(self, quality_assessment: Dict[str, Any]) -> List[str]:
|
||||
"""Identify specific quality issues."""
|
||||
try:
|
||||
issues = []
|
||||
|
||||
if quality_assessment['completeness'] < 0.5:
|
||||
issues.append("Incomplete data: Missing critical onboarding information")
|
||||
|
||||
if quality_assessment['freshness'] < 0.5:
|
||||
issues.append("Stale data: Information may be outdated")
|
||||
|
||||
if quality_assessment['accuracy'] < 0.5:
|
||||
issues.append("Data accuracy concerns: Verify information validity")
|
||||
|
||||
if quality_assessment['relevance'] < 0.5:
|
||||
issues.append("Low relevance: Data may not align with current needs")
|
||||
|
||||
if quality_assessment['consistency'] < 0.5:
|
||||
issues.append("Inconsistent data: Conflicting information detected")
|
||||
|
||||
return issues
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error identifying quality issues: {str(e)}")
|
||||
return ["Unable to identify issues due to assessment error"]
|
||||
|
||||
def _get_fallback_quality_assessment(self) -> Dict[str, Any]:
|
||||
"""Get fallback quality assessment when assessment fails."""
|
||||
return {
|
||||
'overall_score': 0.0,
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'accuracy': 0.0,
|
||||
'relevance': 0.0,
|
||||
'consistency': 0.0,
|
||||
'confidence': 0.0,
|
||||
'quality_level': 'poor',
|
||||
'recommendations': ['Unable to assess data quality'],
|
||||
'issues': ['Quality assessment failed'],
|
||||
'assessment_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
def validate_field_data(self, field_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate individual field data."""
|
||||
try:
|
||||
validation_result = {
|
||||
'is_valid': True,
|
||||
'errors': [],
|
||||
'warnings': [],
|
||||
'confidence': 1.0
|
||||
}
|
||||
|
||||
for field_name, field_value in field_data.items():
|
||||
if field_value is None or field_value == '':
|
||||
validation_result['errors'].append(f"Field '{field_name}' is empty")
|
||||
validation_result['is_valid'] = False
|
||||
elif isinstance(field_value, str) and len(field_value.strip()) < 3:
|
||||
validation_result['warnings'].append(f"Field '{field_name}' may be too short")
|
||||
validation_result['confidence'] *= 0.9
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating field data: {str(e)}")
|
||||
return {
|
||||
'is_valid': False,
|
||||
'errors': ['Validation failed'],
|
||||
'warnings': [],
|
||||
'confidence': 0.0
|
||||
}
|
||||
@@ -0,0 +1,790 @@
|
||||
"""
|
||||
Field Transformation Service
|
||||
Onboarding data to field mapping.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class FieldTransformationService:
|
||||
"""Service for transforming onboarding data to strategic input fields."""
|
||||
|
||||
def __init__(self):
|
||||
# Define field mapping configurations
|
||||
self.field_mappings = {
|
||||
# Business Context mappings
|
||||
'business_objectives': {
|
||||
'sources': ['website_analysis.content_goals', 'research_preferences.research_topics'],
|
||||
'transformation': 'extract_business_objectives'
|
||||
},
|
||||
'target_metrics': {
|
||||
'sources': ['website_analysis.performance_metrics', 'research_preferences.performance_tracking'],
|
||||
'transformation': 'extract_target_metrics'
|
||||
},
|
||||
'content_budget': {
|
||||
'sources': ['onboarding_session.session_data.budget'],
|
||||
'transformation': 'extract_budget'
|
||||
},
|
||||
'team_size': {
|
||||
'sources': ['onboarding_session.session_data.team_size'],
|
||||
'transformation': 'extract_team_size'
|
||||
},
|
||||
'implementation_timeline': {
|
||||
'sources': ['onboarding_session.session_data.timeline'],
|
||||
'transformation': 'extract_timeline'
|
||||
},
|
||||
'market_share': {
|
||||
'sources': ['website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_market_share'
|
||||
},
|
||||
'competitive_position': {
|
||||
'sources': ['website_analysis.competitors', 'research_preferences.competitor_analysis'],
|
||||
'transformation': 'extract_competitive_position'
|
||||
},
|
||||
'performance_metrics': {
|
||||
'sources': ['website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_performance_metrics'
|
||||
},
|
||||
|
||||
# Audience Intelligence mappings
|
||||
'content_preferences': {
|
||||
'sources': ['research_preferences.content_types'],
|
||||
'transformation': 'extract_content_preferences'
|
||||
},
|
||||
'consumption_patterns': {
|
||||
'sources': ['website_analysis.target_audience', 'research_preferences.target_audience'],
|
||||
'transformation': 'extract_consumption_patterns'
|
||||
},
|
||||
'audience_pain_points': {
|
||||
'sources': ['website_analysis.content_gaps', 'research_preferences.research_topics'],
|
||||
'transformation': 'extract_pain_points'
|
||||
},
|
||||
'buying_journey': {
|
||||
'sources': ['website_analysis.target_audience', 'research_preferences.target_audience'],
|
||||
'transformation': 'extract_buying_journey'
|
||||
},
|
||||
'seasonal_trends': {
|
||||
'sources': ['research_preferences.trend_analysis'],
|
||||
'transformation': 'extract_seasonal_trends'
|
||||
},
|
||||
'engagement_metrics': {
|
||||
'sources': ['website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_engagement_metrics'
|
||||
},
|
||||
|
||||
# Competitive Intelligence mappings
|
||||
'top_competitors': {
|
||||
'sources': ['website_analysis.competitors'],
|
||||
'transformation': 'extract_competitors'
|
||||
},
|
||||
'competitor_content_strategies': {
|
||||
'sources': ['website_analysis.competitors', 'research_preferences.competitor_analysis'],
|
||||
'transformation': 'extract_competitor_strategies'
|
||||
},
|
||||
'market_gaps': {
|
||||
'sources': ['website_analysis.content_gaps', 'research_preferences.research_topics'],
|
||||
'transformation': 'extract_market_gaps'
|
||||
},
|
||||
'industry_trends': {
|
||||
'sources': ['website_analysis.industry', 'research_preferences.industry_focus'],
|
||||
'transformation': 'extract_industry_trends'
|
||||
},
|
||||
'emerging_trends': {
|
||||
'sources': ['research_preferences.trend_analysis'],
|
||||
'transformation': 'extract_emerging_trends'
|
||||
},
|
||||
|
||||
# Content Strategy mappings
|
||||
'preferred_formats': {
|
||||
'sources': ['research_preferences.content_types'],
|
||||
'transformation': 'extract_preferred_formats'
|
||||
},
|
||||
'content_mix': {
|
||||
'sources': ['research_preferences.content_types', 'website_analysis.content_goals'],
|
||||
'transformation': 'extract_content_mix'
|
||||
},
|
||||
'content_frequency': {
|
||||
'sources': ['research_preferences.content_calendar'],
|
||||
'transformation': 'extract_content_frequency'
|
||||
},
|
||||
'optimal_timing': {
|
||||
'sources': ['research_preferences.content_calendar'],
|
||||
'transformation': 'extract_optimal_timing'
|
||||
},
|
||||
'quality_metrics': {
|
||||
'sources': ['website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_quality_metrics'
|
||||
},
|
||||
'editorial_guidelines': {
|
||||
'sources': ['website_analysis.business_type', 'research_preferences.content_types'],
|
||||
'transformation': 'extract_editorial_guidelines'
|
||||
},
|
||||
'brand_voice': {
|
||||
'sources': ['website_analysis.business_type', 'onboarding_session.session_data.brand_voice'],
|
||||
'transformation': 'extract_brand_voice'
|
||||
},
|
||||
|
||||
# Performance Analytics mappings
|
||||
'traffic_sources': {
|
||||
'sources': ['website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_traffic_sources'
|
||||
},
|
||||
'conversion_rates': {
|
||||
'sources': ['website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_conversion_rates'
|
||||
},
|
||||
'content_roi_targets': {
|
||||
'sources': ['onboarding_session.session_data.budget', 'website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_roi_targets'
|
||||
},
|
||||
'ab_testing_capabilities': {
|
||||
'sources': ['onboarding_session.session_data.team_size'],
|
||||
'transformation': 'extract_ab_testing_capabilities'
|
||||
}
|
||||
}
|
||||
|
||||
def transform_onboarding_data_to_fields(self, integrated_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Transform integrated onboarding data to strategic input fields."""
|
||||
try:
|
||||
logger.info("Transforming onboarding data to strategic fields")
|
||||
|
||||
transformed_fields = {}
|
||||
data_sources = {}
|
||||
|
||||
for field_id, mapping_config in self.field_mappings.items():
|
||||
try:
|
||||
# Extract data from sources
|
||||
source_data = self._extract_source_data(integrated_data, mapping_config['sources'])
|
||||
|
||||
if source_data:
|
||||
# Apply transformation
|
||||
transformation_method = getattr(self, mapping_config['transformation'])
|
||||
transformed_value = transformation_method(source_data, integrated_data)
|
||||
|
||||
if transformed_value:
|
||||
transformed_fields[field_id] = transformed_value
|
||||
data_sources[field_id] = self._get_data_source_info(mapping_config['sources'], integrated_data)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error transforming field {field_id}: {str(e)}")
|
||||
continue
|
||||
|
||||
result = {
|
||||
'fields': transformed_fields,
|
||||
'sources': data_sources,
|
||||
'transformation_metadata': {
|
||||
'total_fields_processed': len(self.field_mappings),
|
||||
'successful_transformations': len(transformed_fields),
|
||||
'transformation_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(f"Successfully transformed {len(transformed_fields)} fields from onboarding data")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error transforming onboarding data to fields: {str(e)}")
|
||||
return {'fields': {}, 'sources': {}, 'transformation_metadata': {'error': str(e)}}
|
||||
|
||||
def _extract_source_data(self, integrated_data: Dict[str, Any], sources: List[str]) -> Dict[str, Any]:
|
||||
"""Extract data from specified sources."""
|
||||
source_data = {}
|
||||
|
||||
for source_path in sources:
|
||||
try:
|
||||
# Navigate nested dictionary structure
|
||||
keys = source_path.split('.')
|
||||
value = integrated_data
|
||||
|
||||
for key in keys:
|
||||
if isinstance(value, dict) and key in value:
|
||||
value = value[key]
|
||||
else:
|
||||
value = None
|
||||
break
|
||||
|
||||
if value is not None:
|
||||
source_data[source_path] = value
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error extracting data from {source_path}: {str(e)}")
|
||||
continue
|
||||
|
||||
return source_data
|
||||
|
||||
def _get_data_source_info(self, sources: List[str], integrated_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Get information about data sources for a field."""
|
||||
source_info = {
|
||||
'sources': sources,
|
||||
'data_quality': self._assess_source_quality(sources, integrated_data),
|
||||
'last_updated': datetime.utcnow().isoformat()
|
||||
}
|
||||
return source_info
|
||||
|
||||
def _assess_source_quality(self, sources: List[str], integrated_data: Dict[str, Any]) -> float:
|
||||
"""Assess the quality of data sources."""
|
||||
try:
|
||||
quality_scores = []
|
||||
|
||||
for source in sources:
|
||||
# Check if source exists and has data
|
||||
keys = source.split('.')
|
||||
value = integrated_data
|
||||
|
||||
for key in keys:
|
||||
if isinstance(value, dict) and key in value:
|
||||
value = value[key]
|
||||
else:
|
||||
value = None
|
||||
break
|
||||
|
||||
if value:
|
||||
# Basic quality assessment
|
||||
if isinstance(value, (list, dict)) and len(value) > 0:
|
||||
quality_scores.append(1.0)
|
||||
elif isinstance(value, str) and len(value.strip()) > 0:
|
||||
quality_scores.append(0.8)
|
||||
else:
|
||||
quality_scores.append(0.5)
|
||||
else:
|
||||
quality_scores.append(0.0)
|
||||
|
||||
return sum(quality_scores) / len(quality_scores) if quality_scores else 0.0
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing source quality: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
# Transformation methods for each field type
|
||||
def extract_business_objectives(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract business objectives from content goals and research topics."""
|
||||
try:
|
||||
objectives = []
|
||||
|
||||
if 'website_analysis.content_goals' in source_data:
|
||||
goals = source_data['website_analysis.content_goals']
|
||||
if isinstance(goals, list):
|
||||
objectives.extend(goals)
|
||||
elif isinstance(goals, str):
|
||||
objectives.append(goals)
|
||||
|
||||
if 'research_preferences.research_topics' in source_data:
|
||||
topics = source_data['research_preferences.research_topics']
|
||||
if isinstance(topics, list):
|
||||
objectives.extend(topics)
|
||||
elif isinstance(topics, str):
|
||||
objectives.append(topics)
|
||||
|
||||
return ', '.join(objectives) if objectives else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting business objectives: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_target_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract target metrics from performance data."""
|
||||
try:
|
||||
metrics = []
|
||||
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
perf_metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(perf_metrics, dict):
|
||||
metrics.extend([f"{k}: {v}" for k, v in perf_metrics.items()])
|
||||
elif isinstance(perf_metrics, str):
|
||||
metrics.append(perf_metrics)
|
||||
|
||||
if 'research_preferences.performance_tracking' in source_data:
|
||||
tracking = source_data['research_preferences.performance_tracking']
|
||||
if isinstance(tracking, list):
|
||||
metrics.extend(tracking)
|
||||
elif isinstance(tracking, str):
|
||||
metrics.append(tracking)
|
||||
|
||||
return ', '.join(metrics) if metrics else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting target metrics: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_budget(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract content budget from session data."""
|
||||
try:
|
||||
if 'onboarding_session.session_data.budget' in source_data:
|
||||
budget = source_data['onboarding_session.session_data.budget']
|
||||
if budget:
|
||||
return str(budget)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting budget: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_team_size(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract team size from session data."""
|
||||
try:
|
||||
if 'onboarding_session.session_data.team_size' in source_data:
|
||||
team_size = source_data['onboarding_session.session_data.team_size']
|
||||
if team_size:
|
||||
return str(team_size)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting team size: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_timeline(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract implementation timeline from session data."""
|
||||
try:
|
||||
if 'onboarding_session.session_data.timeline' in source_data:
|
||||
timeline = source_data['onboarding_session.session_data.timeline']
|
||||
if timeline:
|
||||
return str(timeline)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting timeline: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_market_share(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract market share from performance metrics."""
|
||||
try:
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict) and 'market_share' in metrics:
|
||||
return str(metrics['market_share'])
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting market share: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_competitive_position(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract competitive position from competitor data."""
|
||||
try:
|
||||
position_indicators = []
|
||||
|
||||
if 'website_analysis.competitors' in source_data:
|
||||
competitors = source_data['website_analysis.competitors']
|
||||
if competitors:
|
||||
position_indicators.append(f"Competitors: {competitors}")
|
||||
|
||||
if 'research_preferences.competitor_analysis' in source_data:
|
||||
analysis = source_data['research_preferences.competitor_analysis']
|
||||
if analysis:
|
||||
position_indicators.append(f"Analysis: {analysis}")
|
||||
|
||||
return '; '.join(position_indicators) if position_indicators else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting competitive position: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_performance_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract performance metrics."""
|
||||
try:
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict):
|
||||
return ', '.join([f"{k}: {v}" for k, v in metrics.items()])
|
||||
elif isinstance(metrics, str):
|
||||
return metrics
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting performance metrics: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_content_preferences(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract content preferences from research preferences."""
|
||||
try:
|
||||
if 'research_preferences.content_types' in source_data:
|
||||
content_types = source_data['research_preferences.content_types']
|
||||
if isinstance(content_types, list):
|
||||
return ', '.join(content_types)
|
||||
elif isinstance(content_types, str):
|
||||
return content_types
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content preferences: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_consumption_patterns(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract consumption patterns from audience data."""
|
||||
try:
|
||||
patterns = []
|
||||
|
||||
if 'website_analysis.target_audience' in source_data:
|
||||
audience = source_data['website_analysis.target_audience']
|
||||
if audience:
|
||||
patterns.append(f"Website Audience: {audience}")
|
||||
|
||||
if 'research_preferences.target_audience' in source_data:
|
||||
research_audience = source_data['research_preferences.target_audience']
|
||||
if research_audience:
|
||||
patterns.append(f"Research Audience: {research_audience}")
|
||||
|
||||
return '; '.join(patterns) if patterns else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting consumption patterns: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_pain_points(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract audience pain points from content gaps and research topics."""
|
||||
try:
|
||||
pain_points = []
|
||||
|
||||
if 'website_analysis.content_gaps' in source_data:
|
||||
gaps = source_data['website_analysis.content_gaps']
|
||||
if isinstance(gaps, list):
|
||||
pain_points.extend(gaps)
|
||||
elif isinstance(gaps, str):
|
||||
pain_points.append(gaps)
|
||||
|
||||
if 'research_preferences.research_topics' in source_data:
|
||||
topics = source_data['research_preferences.research_topics']
|
||||
if isinstance(topics, list):
|
||||
pain_points.extend(topics)
|
||||
elif isinstance(topics, str):
|
||||
pain_points.append(topics)
|
||||
|
||||
return ', '.join(pain_points) if pain_points else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting pain points: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_buying_journey(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract buying journey from audience data."""
|
||||
try:
|
||||
if 'website_analysis.target_audience' in source_data:
|
||||
audience = source_data['website_analysis.target_audience']
|
||||
if audience:
|
||||
return f"Journey based on: {audience}"
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting buying journey: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_seasonal_trends(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract seasonal trends from trend analysis."""
|
||||
try:
|
||||
if 'research_preferences.trend_analysis' in source_data:
|
||||
trends = source_data['research_preferences.trend_analysis']
|
||||
if isinstance(trends, list):
|
||||
return ', '.join(trends)
|
||||
elif isinstance(trends, str):
|
||||
return trends
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting seasonal trends: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_engagement_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract engagement metrics from performance data."""
|
||||
try:
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict):
|
||||
engagement_metrics = {k: v for k, v in metrics.items() if 'engagement' in k.lower()}
|
||||
if engagement_metrics:
|
||||
return ', '.join([f"{k}: {v}" for k, v in engagement_metrics.items()])
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting engagement metrics: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_competitors(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract top competitors from competitor data."""
|
||||
try:
|
||||
if 'website_analysis.competitors' in source_data:
|
||||
competitors = source_data['website_analysis.competitors']
|
||||
if isinstance(competitors, list):
|
||||
return ', '.join(competitors)
|
||||
elif isinstance(competitors, str):
|
||||
return competitors
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting competitors: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_competitor_strategies(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract competitor content strategies."""
|
||||
try:
|
||||
strategies = []
|
||||
|
||||
if 'website_analysis.competitors' in source_data:
|
||||
competitors = source_data['website_analysis.competitors']
|
||||
if competitors:
|
||||
strategies.append(f"Competitors: {competitors}")
|
||||
|
||||
if 'research_preferences.competitor_analysis' in source_data:
|
||||
analysis = source_data['research_preferences.competitor_analysis']
|
||||
if analysis:
|
||||
strategies.append(f"Analysis: {analysis}")
|
||||
|
||||
return '; '.join(strategies) if strategies else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting competitor strategies: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_market_gaps(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract market gaps from content gaps and research topics."""
|
||||
try:
|
||||
gaps = []
|
||||
|
||||
if 'website_analysis.content_gaps' in source_data:
|
||||
content_gaps = source_data['website_analysis.content_gaps']
|
||||
if isinstance(content_gaps, list):
|
||||
gaps.extend(content_gaps)
|
||||
elif isinstance(content_gaps, str):
|
||||
gaps.append(content_gaps)
|
||||
|
||||
if 'research_preferences.research_topics' in source_data:
|
||||
topics = source_data['research_preferences.research_topics']
|
||||
if isinstance(topics, list):
|
||||
gaps.extend(topics)
|
||||
elif isinstance(topics, str):
|
||||
gaps.append(topics)
|
||||
|
||||
return ', '.join(gaps) if gaps else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting market gaps: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_industry_trends(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract industry trends from industry data."""
|
||||
try:
|
||||
trends = []
|
||||
|
||||
if 'website_analysis.industry' in source_data:
|
||||
industry = source_data['website_analysis.industry']
|
||||
if industry:
|
||||
trends.append(f"Industry: {industry}")
|
||||
|
||||
if 'research_preferences.industry_focus' in source_data:
|
||||
focus = source_data['research_preferences.industry_focus']
|
||||
if focus:
|
||||
trends.append(f"Focus: {focus}")
|
||||
|
||||
return '; '.join(trends) if trends else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting industry trends: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_emerging_trends(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract emerging trends from trend analysis."""
|
||||
try:
|
||||
if 'research_preferences.trend_analysis' in source_data:
|
||||
trends = source_data['research_preferences.trend_analysis']
|
||||
if isinstance(trends, list):
|
||||
return ', '.join(trends)
|
||||
elif isinstance(trends, str):
|
||||
return trends
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting emerging trends: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_preferred_formats(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract preferred content formats."""
|
||||
try:
|
||||
if 'research_preferences.content_types' in source_data:
|
||||
content_types = source_data['research_preferences.content_types']
|
||||
if isinstance(content_types, list):
|
||||
return ', '.join(content_types)
|
||||
elif isinstance(content_types, str):
|
||||
return content_types
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting preferred formats: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_content_mix(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract content mix from content types and goals."""
|
||||
try:
|
||||
mix_components = []
|
||||
|
||||
if 'research_preferences.content_types' in source_data:
|
||||
content_types = source_data['research_preferences.content_types']
|
||||
if content_types:
|
||||
mix_components.append(f"Types: {content_types}")
|
||||
|
||||
if 'website_analysis.content_goals' in source_data:
|
||||
goals = source_data['website_analysis.content_goals']
|
||||
if goals:
|
||||
mix_components.append(f"Goals: {goals}")
|
||||
|
||||
return '; '.join(mix_components) if mix_components else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content mix: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_content_frequency(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract content frequency from calendar data."""
|
||||
try:
|
||||
if 'research_preferences.content_calendar' in source_data:
|
||||
calendar = source_data['research_preferences.content_calendar']
|
||||
if calendar:
|
||||
return str(calendar)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content frequency: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_optimal_timing(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract optimal timing from calendar data."""
|
||||
try:
|
||||
if 'research_preferences.content_calendar' in source_data:
|
||||
calendar = source_data['research_preferences.content_calendar']
|
||||
if calendar:
|
||||
return str(calendar)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting optimal timing: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_quality_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract quality metrics from performance data."""
|
||||
try:
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict):
|
||||
quality_metrics = {k: v for k, v in metrics.items() if 'quality' in k.lower()}
|
||||
if quality_metrics:
|
||||
return ', '.join([f"{k}: {v}" for k, v in quality_metrics.items()])
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting quality metrics: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_editorial_guidelines(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract editorial guidelines from business type and content types."""
|
||||
try:
|
||||
guidelines = []
|
||||
|
||||
if 'website_analysis.business_type' in source_data:
|
||||
business_type = source_data['website_analysis.business_type']
|
||||
if business_type:
|
||||
guidelines.append(f"Business Type: {business_type}")
|
||||
|
||||
if 'research_preferences.content_types' in source_data:
|
||||
content_types = source_data['research_preferences.content_types']
|
||||
if content_types:
|
||||
guidelines.append(f"Content Types: {content_types}")
|
||||
|
||||
return '; '.join(guidelines) if guidelines else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting editorial guidelines: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_brand_voice(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract brand voice from business type and session data."""
|
||||
try:
|
||||
voice_indicators = []
|
||||
|
||||
if 'website_analysis.business_type' in source_data:
|
||||
business_type = source_data['website_analysis.business_type']
|
||||
if business_type:
|
||||
voice_indicators.append(f"Business Type: {business_type}")
|
||||
|
||||
if 'onboarding_session.session_data.brand_voice' in source_data:
|
||||
brand_voice = source_data['onboarding_session.session_data.brand_voice']
|
||||
if brand_voice:
|
||||
voice_indicators.append(f"Brand Voice: {brand_voice}")
|
||||
|
||||
return '; '.join(voice_indicators) if voice_indicators else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting brand voice: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_traffic_sources(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract traffic sources from performance metrics."""
|
||||
try:
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict):
|
||||
traffic_metrics = {k: v for k, v in metrics.items() if 'traffic' in k.lower()}
|
||||
if traffic_metrics:
|
||||
return ', '.join([f"{k}: {v}" for k, v in traffic_metrics.items()])
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting traffic sources: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_conversion_rates(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract conversion rates from performance metrics."""
|
||||
try:
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict):
|
||||
conversion_metrics = {k: v for k, v in metrics.items() if 'conversion' in k.lower()}
|
||||
if conversion_metrics:
|
||||
return ', '.join([f"{k}: {v}" for k, v in conversion_metrics.items()])
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting conversion rates: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_roi_targets(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract ROI targets from budget and performance data."""
|
||||
try:
|
||||
targets = []
|
||||
|
||||
if 'onboarding_session.session_data.budget' in source_data:
|
||||
budget = source_data['onboarding_session.session_data.budget']
|
||||
if budget:
|
||||
targets.append(f"Budget: {budget}")
|
||||
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict):
|
||||
roi_metrics = {k: v for k, v in metrics.items() if 'roi' in k.lower()}
|
||||
if roi_metrics:
|
||||
targets.append(f"ROI Metrics: {roi_metrics}")
|
||||
|
||||
return '; '.join(targets) if targets else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting ROI targets: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_ab_testing_capabilities(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract A/B testing capabilities from team size."""
|
||||
try:
|
||||
if 'onboarding_session.session_data.team_size' in source_data:
|
||||
team_size = source_data['onboarding_session.session_data.team_size']
|
||||
if team_size:
|
||||
# Simple logic based on team size
|
||||
if int(team_size) > 5:
|
||||
return "Advanced A/B testing capabilities"
|
||||
elif int(team_size) > 2:
|
||||
return "Basic A/B testing capabilities"
|
||||
else:
|
||||
return "Limited A/B testing capabilities"
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting A/B testing capabilities: {str(e)}")
|
||||
return None
|
||||
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
Performance Module
|
||||
Caching, optimization, and health monitoring services.
|
||||
"""
|
||||
|
||||
from .caching import CachingService
|
||||
from .optimization import PerformanceOptimizationService
|
||||
from .health_monitoring import HealthMonitoringService
|
||||
|
||||
__all__ = ['CachingService', 'PerformanceOptimizationService', 'HealthMonitoringService']
|
||||
@@ -0,0 +1,469 @@
|
||||
"""
|
||||
Caching Service
|
||||
Cache management and optimization.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import hashlib
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Try to import Redis, fallback to in-memory if not available
|
||||
try:
|
||||
import redis
|
||||
REDIS_AVAILABLE = True
|
||||
except ImportError:
|
||||
REDIS_AVAILABLE = False
|
||||
logger.warning("Redis not available, using in-memory caching")
|
||||
|
||||
class CachingService:
|
||||
"""Service for intelligent caching of content strategy data."""
|
||||
|
||||
def __init__(self):
|
||||
# Cache configuration
|
||||
self.cache_config = {
|
||||
'ai_analysis': {
|
||||
'ttl': 3600, # 1 hour
|
||||
'max_size': 1000,
|
||||
'priority': 'high'
|
||||
},
|
||||
'onboarding_data': {
|
||||
'ttl': 1800, # 30 minutes
|
||||
'max_size': 500,
|
||||
'priority': 'medium'
|
||||
},
|
||||
'strategy_cache': {
|
||||
'ttl': 7200, # 2 hours
|
||||
'max_size': 200,
|
||||
'priority': 'high'
|
||||
},
|
||||
'field_transformations': {
|
||||
'ttl': 900, # 15 minutes
|
||||
'max_size': 1000,
|
||||
'priority': 'low'
|
||||
}
|
||||
}
|
||||
|
||||
# Initialize Redis connection if available
|
||||
self.redis_available = False
|
||||
if REDIS_AVAILABLE:
|
||||
try:
|
||||
self.redis_client = redis.Redis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=0,
|
||||
decode_responses=True,
|
||||
socket_connect_timeout=5,
|
||||
socket_timeout=5
|
||||
)
|
||||
# Test connection
|
||||
self.redis_client.ping()
|
||||
self.redis_available = True
|
||||
logger.info("Redis connection established successfully")
|
||||
except Exception as e:
|
||||
logger.warning(f"Redis connection failed: {str(e)}. Using in-memory cache.")
|
||||
self.redis_available = False
|
||||
self.memory_cache = {}
|
||||
else:
|
||||
logger.info("Using in-memory cache (Redis not available)")
|
||||
self.memory_cache = {}
|
||||
|
||||
def get_cache_key(self, cache_type: str, identifier: str, **kwargs) -> str:
|
||||
"""Generate a unique cache key."""
|
||||
try:
|
||||
# Create a hash of the identifier and additional parameters
|
||||
key_data = f"{cache_type}:{identifier}"
|
||||
if kwargs:
|
||||
key_data += ":" + json.dumps(kwargs, sort_keys=True)
|
||||
|
||||
# Create hash for consistent key length
|
||||
key_hash = hashlib.md5(key_data.encode()).hexdigest()
|
||||
return f"content_strategy:{cache_type}:{key_hash}"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating cache key: {str(e)}")
|
||||
return f"content_strategy:{cache_type}:{identifier}"
|
||||
|
||||
async def get_cached_data(self, cache_type: str, identifier: str, **kwargs) -> Optional[Dict[str, Any]]:
|
||||
"""Retrieve cached data."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return self._get_from_memory_cache(cache_type, identifier, **kwargs)
|
||||
|
||||
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
|
||||
cached_data = self.redis_client.get(cache_key)
|
||||
|
||||
if cached_data:
|
||||
data = json.loads(cached_data)
|
||||
logger.info(f"Cache hit for {cache_type}:{identifier}")
|
||||
return data
|
||||
else:
|
||||
logger.info(f"Cache miss for {cache_type}:{identifier}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving cached data: {str(e)}")
|
||||
return None
|
||||
|
||||
async def set_cached_data(self, cache_type: str, identifier: str, data: Dict[str, Any], **kwargs) -> bool:
|
||||
"""Store data in cache."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return self._set_in_memory_cache(cache_type, identifier, data, **kwargs)
|
||||
|
||||
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
|
||||
ttl = self.cache_config.get(cache_type, {}).get('ttl', 3600)
|
||||
|
||||
# Add metadata to cached data
|
||||
cached_data = {
|
||||
'data': data,
|
||||
'metadata': {
|
||||
'cached_at': datetime.utcnow().isoformat(),
|
||||
'cache_type': cache_type,
|
||||
'identifier': identifier,
|
||||
'ttl': ttl
|
||||
}
|
||||
}
|
||||
|
||||
# Store in Redis with TTL
|
||||
result = self.redis_client.setex(
|
||||
cache_key,
|
||||
ttl,
|
||||
json.dumps(cached_data, default=str)
|
||||
)
|
||||
|
||||
if result:
|
||||
logger.info(f"Data cached successfully for {cache_type}:{identifier}")
|
||||
await self._update_cache_stats(cache_type, 'set')
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"Failed to cache data for {cache_type}:{identifier}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting cached data: {str(e)}")
|
||||
return False
|
||||
|
||||
async def invalidate_cache(self, cache_type: str, identifier: str, **kwargs) -> bool:
|
||||
"""Invalidate specific cached data."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return self._invalidate_memory_cache(cache_type, identifier, **kwargs)
|
||||
|
||||
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
|
||||
result = self.redis_client.delete(cache_key)
|
||||
|
||||
if result:
|
||||
logger.info(f"Cache invalidated for {cache_type}:{identifier}")
|
||||
await self._update_cache_stats(cache_type, 'invalidate')
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"No cache entry found to invalidate for {cache_type}:{identifier}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error invalidating cache: {str(e)}")
|
||||
return False
|
||||
|
||||
async def clear_cache_type(self, cache_type: str) -> bool:
|
||||
"""Clear all cached data of a specific type."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return self._clear_memory_cache_type(cache_type)
|
||||
|
||||
pattern = f"content_strategy:{cache_type}:*"
|
||||
keys = self.redis_client.keys(pattern)
|
||||
|
||||
if keys:
|
||||
result = self.redis_client.delete(*keys)
|
||||
logger.info(f"Cleared {result} cache entries for {cache_type}")
|
||||
await self._update_cache_stats(cache_type, 'clear')
|
||||
return True
|
||||
else:
|
||||
logger.info(f"No cache entries found for {cache_type}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing cache type {cache_type}: {str(e)}")
|
||||
return False
|
||||
|
||||
async def get_cache_stats(self, cache_type: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Get cache statistics."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return self._get_memory_cache_stats(cache_type)
|
||||
|
||||
stats = {}
|
||||
|
||||
if cache_type:
|
||||
pattern = f"content_strategy:{cache_type}:*"
|
||||
keys = self.redis_client.keys(pattern)
|
||||
stats[cache_type] = {
|
||||
'entries': len(keys),
|
||||
'size_bytes': sum(len(self.redis_client.get(key) or '') for key in keys),
|
||||
'config': self.cache_config.get(cache_type, {})
|
||||
}
|
||||
else:
|
||||
for cache_type_name in self.cache_config.keys():
|
||||
pattern = f"content_strategy:{cache_type_name}:*"
|
||||
keys = self.redis_client.keys(pattern)
|
||||
stats[cache_type_name] = {
|
||||
'entries': len(keys),
|
||||
'size_bytes': sum(len(self.redis_client.get(key) or '') for key in keys),
|
||||
'config': self.cache_config.get(cache_type_name, {})
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting cache stats: {str(e)}")
|
||||
return {}
|
||||
|
||||
async def optimize_cache(self) -> Dict[str, Any]:
|
||||
"""Optimize cache by removing expired entries and managing memory."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return self._optimize_memory_cache()
|
||||
|
||||
optimization_results = {}
|
||||
|
||||
for cache_type, config in self.cache_config.items():
|
||||
pattern = f"content_strategy:{cache_type}:*"
|
||||
keys = self.redis_client.keys(pattern)
|
||||
|
||||
if len(keys) > config.get('max_size', 1000):
|
||||
# Remove oldest entries to maintain max size
|
||||
keys_with_times = []
|
||||
for key in keys:
|
||||
ttl = self.redis_client.ttl(key)
|
||||
if ttl > 0: # Key still has TTL
|
||||
keys_with_times.append((key, ttl))
|
||||
|
||||
# Sort by TTL (oldest first)
|
||||
keys_with_times.sort(key=lambda x: x[1])
|
||||
|
||||
# Remove excess entries
|
||||
excess_count = len(keys) - config.get('max_size', 1000)
|
||||
keys_to_remove = [key for key, _ in keys_with_times[:excess_count]]
|
||||
|
||||
if keys_to_remove:
|
||||
removed_count = self.redis_client.delete(*keys_to_remove)
|
||||
optimization_results[cache_type] = {
|
||||
'entries_removed': removed_count,
|
||||
'reason': 'max_size_exceeded'
|
||||
}
|
||||
logger.info(f"Optimized {cache_type} cache: removed {removed_count} entries")
|
||||
|
||||
return optimization_results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing cache: {str(e)}")
|
||||
return {}
|
||||
|
||||
async def _update_cache_stats(self, cache_type: str, operation: str) -> None:
|
||||
"""Update cache statistics."""
|
||||
try:
|
||||
if not self.redis_available:
|
||||
return
|
||||
|
||||
stats_key = f"cache_stats:{cache_type}"
|
||||
current_stats = self.redis_client.hgetall(stats_key)
|
||||
|
||||
# Update operation counts
|
||||
current_stats[f"{operation}_count"] = str(int(current_stats.get(f"{operation}_count", 0)) + 1)
|
||||
current_stats['last_updated'] = datetime.utcnow().isoformat()
|
||||
|
||||
# Store updated stats
|
||||
self.redis_client.hset(stats_key, mapping=current_stats)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating cache stats: {str(e)}")
|
||||
|
||||
# Memory cache fallback methods
|
||||
def _get_from_memory_cache(self, cache_type: str, identifier: str, **kwargs) -> Optional[Dict[str, Any]]:
|
||||
"""Get data from memory cache."""
|
||||
try:
|
||||
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
|
||||
cached_data = self.memory_cache.get(cache_key)
|
||||
|
||||
if cached_data:
|
||||
# Check if data is still valid
|
||||
cached_at = datetime.fromisoformat(cached_data['metadata']['cached_at'])
|
||||
ttl = cached_data['metadata']['ttl']
|
||||
|
||||
if datetime.utcnow() - cached_at < timedelta(seconds=ttl):
|
||||
logger.info(f"Memory cache hit for {cache_type}:{identifier}")
|
||||
return cached_data['data']
|
||||
else:
|
||||
# Remove expired entry
|
||||
del self.memory_cache[cache_key]
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting from memory cache: {str(e)}")
|
||||
return None
|
||||
|
||||
def _set_in_memory_cache(self, cache_type: str, identifier: str, data: Dict[str, Any], **kwargs) -> bool:
|
||||
"""Set data in memory cache."""
|
||||
try:
|
||||
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
|
||||
ttl = self.cache_config.get(cache_type, {}).get('ttl', 3600)
|
||||
|
||||
cached_data = {
|
||||
'data': data,
|
||||
'metadata': {
|
||||
'cached_at': datetime.utcnow().isoformat(),
|
||||
'cache_type': cache_type,
|
||||
'identifier': identifier,
|
||||
'ttl': ttl
|
||||
}
|
||||
}
|
||||
|
||||
# Check max size and remove oldest if needed
|
||||
max_size = self.cache_config.get(cache_type, {}).get('max_size', 1000)
|
||||
if len(self.memory_cache) >= max_size:
|
||||
# Remove oldest entry
|
||||
oldest_key = min(self.memory_cache.keys(),
|
||||
key=lambda k: self.memory_cache[k]['metadata']['cached_at'])
|
||||
del self.memory_cache[oldest_key]
|
||||
|
||||
self.memory_cache[cache_key] = cached_data
|
||||
logger.info(f"Data cached in memory for {cache_type}:{identifier}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting in memory cache: {str(e)}")
|
||||
return False
|
||||
|
||||
def _invalidate_memory_cache(self, cache_type: str, identifier: str, **kwargs) -> bool:
|
||||
"""Invalidate memory cache entry."""
|
||||
try:
|
||||
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
|
||||
if cache_key in self.memory_cache:
|
||||
del self.memory_cache[cache_key]
|
||||
logger.info(f"Memory cache invalidated for {cache_type}:{identifier}")
|
||||
return True
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error invalidating memory cache: {str(e)}")
|
||||
return False
|
||||
|
||||
def _clear_memory_cache_type(self, cache_type: str) -> bool:
|
||||
"""Clear memory cache by type."""
|
||||
try:
|
||||
keys_to_remove = [key for key in self.memory_cache.keys()
|
||||
if key.startswith(f"content_strategy:{cache_type}:")]
|
||||
|
||||
for key in keys_to_remove:
|
||||
del self.memory_cache[key]
|
||||
|
||||
logger.info(f"Cleared {len(keys_to_remove)} memory cache entries for {cache_type}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing memory cache type: {str(e)}")
|
||||
return False
|
||||
|
||||
def _get_memory_cache_stats(self, cache_type: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Get memory cache statistics."""
|
||||
try:
|
||||
stats = {}
|
||||
|
||||
if cache_type:
|
||||
keys = [key for key in self.memory_cache.keys()
|
||||
if key.startswith(f"content_strategy:{cache_type}:")]
|
||||
stats[cache_type] = {
|
||||
'entries': len(keys),
|
||||
'size_bytes': sum(len(str(value)) for value in [self.memory_cache[key] for key in keys]),
|
||||
'config': self.cache_config.get(cache_type, {})
|
||||
}
|
||||
else:
|
||||
for cache_type_name in self.cache_config.keys():
|
||||
keys = [key for key in self.memory_cache.keys()
|
||||
if key.startswith(f"content_strategy:{cache_type_name}:")]
|
||||
stats[cache_type_name] = {
|
||||
'entries': len(keys),
|
||||
'size_bytes': sum(len(str(value)) for value in [self.memory_cache[key] for key in keys]),
|
||||
'config': self.cache_config.get(cache_type_name, {})
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting memory cache stats: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _optimize_memory_cache(self) -> Dict[str, Any]:
|
||||
"""Optimize memory cache."""
|
||||
try:
|
||||
optimization_results = {}
|
||||
|
||||
for cache_type, config in self.cache_config.items():
|
||||
keys = [key for key in self.memory_cache.keys()
|
||||
if key.startswith(f"content_strategy:{cache_type}:")]
|
||||
|
||||
if len(keys) > config.get('max_size', 1000):
|
||||
# Remove oldest entries
|
||||
keys_with_times = []
|
||||
for key in keys:
|
||||
cached_at = datetime.fromisoformat(self.memory_cache[key]['metadata']['cached_at'])
|
||||
keys_with_times.append((key, cached_at))
|
||||
|
||||
# Sort by cached time (oldest first)
|
||||
keys_with_times.sort(key=lambda x: x[1])
|
||||
|
||||
# Remove excess entries
|
||||
excess_count = len(keys) - config.get('max_size', 1000)
|
||||
keys_to_remove = [key for key, _ in keys_with_times[:excess_count]]
|
||||
|
||||
for key in keys_to_remove:
|
||||
del self.memory_cache[key]
|
||||
|
||||
optimization_results[cache_type] = {
|
||||
'entries_removed': len(keys_to_remove),
|
||||
'reason': 'max_size_exceeded'
|
||||
}
|
||||
|
||||
return optimization_results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing memory cache: {str(e)}")
|
||||
return {}
|
||||
|
||||
# Cache-specific methods for different data types
|
||||
async def cache_ai_analysis(self, user_id: int, analysis_type: str, analysis_data: Dict[str, Any]) -> bool:
|
||||
"""Cache AI analysis results."""
|
||||
return await self.set_cached_data('ai_analysis', f"{user_id}:{analysis_type}", analysis_data)
|
||||
|
||||
async def get_cached_ai_analysis(self, user_id: int, analysis_type: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached AI analysis results."""
|
||||
return await self.get_cached_data('ai_analysis', f"{user_id}:{analysis_type}")
|
||||
|
||||
async def cache_onboarding_data(self, user_id: int, onboarding_data: Dict[str, Any]) -> bool:
|
||||
"""Cache onboarding data."""
|
||||
return await self.set_cached_data('onboarding_data', str(user_id), onboarding_data)
|
||||
|
||||
async def get_cached_onboarding_data(self, user_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached onboarding data."""
|
||||
return await self.get_cached_data('onboarding_data', str(user_id))
|
||||
|
||||
async def cache_strategy(self, strategy_id: int, strategy_data: Dict[str, Any]) -> bool:
|
||||
"""Cache strategy data."""
|
||||
return await self.set_cached_data('strategy_cache', str(strategy_id), strategy_data)
|
||||
|
||||
async def get_cached_strategy(self, strategy_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached strategy data."""
|
||||
return await self.get_cached_data('strategy_cache', str(strategy_id))
|
||||
|
||||
async def cache_field_transformations(self, user_id: int, transformations: Dict[str, Any]) -> bool:
|
||||
"""Cache field transformations."""
|
||||
return await self.set_cached_data('field_transformations', str(user_id), transformations)
|
||||
|
||||
async def get_cached_field_transformations(self, user_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached field transformations."""
|
||||
return await self.get_cached_data('field_transformations', str(user_id))
|
||||
@@ -0,0 +1,503 @@
|
||||
"""
|
||||
Health Monitoring Service
|
||||
System health monitoring and performance tracking.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import asyncio
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class HealthMonitoringService:
|
||||
"""Service for system health monitoring and assessment."""
|
||||
|
||||
def __init__(self):
|
||||
self.health_thresholds = {
|
||||
'database_response_time': 1.0, # seconds
|
||||
'cache_response_time': 0.1, # seconds
|
||||
'ai_service_response_time': 5.0, # seconds
|
||||
'memory_usage_threshold': 80, # percentage
|
||||
'cpu_usage_threshold': 80, # percentage
|
||||
'disk_usage_threshold': 90, # percentage
|
||||
'error_rate_threshold': 0.05 # 5%
|
||||
}
|
||||
|
||||
self.health_status = {
|
||||
'timestamp': None,
|
||||
'overall_status': 'healthy',
|
||||
'components': {},
|
||||
'alerts': [],
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
async def check_system_health(self, db: Session, cache_service=None, ai_service=None) -> Dict[str, Any]:
|
||||
"""Perform comprehensive system health check."""
|
||||
try:
|
||||
logger.info("Starting comprehensive system health check")
|
||||
|
||||
health_report = {
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'overall_status': 'healthy',
|
||||
'components': {},
|
||||
'alerts': [],
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
# Check database health
|
||||
db_health = await self._check_database_health(db)
|
||||
health_report['components']['database'] = db_health
|
||||
|
||||
# Check cache health
|
||||
if cache_service:
|
||||
cache_health = await self._check_cache_health(cache_service)
|
||||
health_report['components']['cache'] = cache_health
|
||||
else:
|
||||
health_report['components']['cache'] = {'status': 'not_available', 'message': 'Cache service not provided'}
|
||||
|
||||
# Check AI service health
|
||||
if ai_service:
|
||||
ai_health = await self._check_ai_service_health(ai_service)
|
||||
health_report['components']['ai_service'] = ai_health
|
||||
else:
|
||||
health_report['components']['ai_service'] = {'status': 'not_available', 'message': 'AI service not provided'}
|
||||
|
||||
# Check system resources
|
||||
system_health = await self._check_system_resources()
|
||||
health_report['components']['system'] = system_health
|
||||
|
||||
# Determine overall status
|
||||
health_report['overall_status'] = self._determine_overall_health(health_report['components'])
|
||||
|
||||
# Generate alerts and recommendations
|
||||
health_report['alerts'] = self._generate_health_alerts(health_report['components'])
|
||||
health_report['recommendations'] = await self._generate_health_recommendations(health_report['components'])
|
||||
|
||||
# Update health status
|
||||
self.health_status = health_report
|
||||
|
||||
logger.info(f"System health check completed. Overall status: {health_report['overall_status']}")
|
||||
return health_report
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during system health check: {str(e)}")
|
||||
return {
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'overall_status': 'error',
|
||||
'components': {},
|
||||
'alerts': [f'Health check failed: {str(e)}'],
|
||||
'recommendations': ['Investigate health check system']
|
||||
}
|
||||
|
||||
async def _check_database_health(self, db: Session) -> Dict[str, Any]:
|
||||
"""Check database health and performance."""
|
||||
try:
|
||||
start_time = time.time()
|
||||
|
||||
# Test database connection
|
||||
try:
|
||||
result = db.execute(text("SELECT 1"))
|
||||
result.fetchone()
|
||||
connection_status = 'healthy'
|
||||
except Exception as e:
|
||||
connection_status = 'unhealthy'
|
||||
logger.error(f"Database connection test failed: {str(e)}")
|
||||
|
||||
# Test query performance
|
||||
try:
|
||||
query_start = time.time()
|
||||
result = db.execute(text("SELECT COUNT(*) FROM information_schema.tables"))
|
||||
result.fetchone()
|
||||
query_time = time.time() - query_start
|
||||
query_status = 'healthy' if query_time <= self.health_thresholds['database_response_time'] else 'degraded'
|
||||
except Exception as e:
|
||||
query_time = 0
|
||||
query_status = 'unhealthy'
|
||||
logger.error(f"Database query test failed: {str(e)}")
|
||||
|
||||
# Check database size and performance
|
||||
try:
|
||||
# Get database statistics
|
||||
db_stats = await self._get_database_statistics(db)
|
||||
except Exception as e:
|
||||
db_stats = {'error': str(e)}
|
||||
|
||||
total_time = time.time() - start_time
|
||||
|
||||
return {
|
||||
'status': 'healthy' if connection_status == 'healthy' and query_status == 'healthy' else 'degraded',
|
||||
'connection_status': connection_status,
|
||||
'query_status': query_status,
|
||||
'response_time': query_time,
|
||||
'total_check_time': total_time,
|
||||
'statistics': db_stats,
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking database health: {str(e)}")
|
||||
return {
|
||||
'status': 'unhealthy',
|
||||
'error': str(e),
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def _check_cache_health(self, cache_service) -> Dict[str, Any]:
|
||||
"""Check cache health and performance."""
|
||||
try:
|
||||
start_time = time.time()
|
||||
|
||||
# Test cache connectivity
|
||||
try:
|
||||
cache_stats = await cache_service.get_cache_stats()
|
||||
connectivity_status = 'healthy'
|
||||
except Exception as e:
|
||||
cache_stats = {}
|
||||
connectivity_status = 'unhealthy'
|
||||
logger.error(f"Cache connectivity test failed: {str(e)}")
|
||||
|
||||
# Test cache performance
|
||||
try:
|
||||
test_key = f"health_check_{int(time.time())}"
|
||||
test_data = {'test': 'data', 'timestamp': datetime.utcnow().isoformat()}
|
||||
|
||||
# Test write
|
||||
write_start = time.time()
|
||||
write_success = await cache_service.set_cached_data('health_check', test_key, test_data)
|
||||
write_time = time.time() - write_start
|
||||
|
||||
# Test read
|
||||
read_start = time.time()
|
||||
read_data = await cache_service.get_cached_data('health_check', test_key)
|
||||
read_time = time.time() - read_start
|
||||
|
||||
# Clean up
|
||||
await cache_service.invalidate_cache('health_check', test_key)
|
||||
|
||||
performance_status = 'healthy' if write_success and read_data and (write_time + read_time) <= self.health_thresholds['cache_response_time'] else 'degraded'
|
||||
|
||||
except Exception as e:
|
||||
write_time = 0
|
||||
read_time = 0
|
||||
performance_status = 'unhealthy'
|
||||
logger.error(f"Cache performance test failed: {str(e)}")
|
||||
|
||||
total_time = time.time() - start_time
|
||||
|
||||
return {
|
||||
'status': 'healthy' if connectivity_status == 'healthy' and performance_status == 'healthy' else 'degraded',
|
||||
'connectivity_status': connectivity_status,
|
||||
'performance_status': performance_status,
|
||||
'write_time': write_time,
|
||||
'read_time': read_time,
|
||||
'total_check_time': total_time,
|
||||
'statistics': cache_stats,
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking cache health: {str(e)}")
|
||||
return {
|
||||
'status': 'unhealthy',
|
||||
'error': str(e),
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def _check_ai_service_health(self, ai_service) -> Dict[str, Any]:
|
||||
"""Check AI service health and performance."""
|
||||
try:
|
||||
start_time = time.time()
|
||||
|
||||
# Test AI service connectivity
|
||||
try:
|
||||
# Simple test call to AI service
|
||||
test_prompt = "Test health check"
|
||||
ai_start = time.time()
|
||||
ai_response = await ai_service._call_ai_service(test_prompt, 'health_check')
|
||||
ai_time = time.time() - ai_start
|
||||
|
||||
connectivity_status = 'healthy' if ai_response else 'unhealthy'
|
||||
performance_status = 'healthy' if ai_time <= self.health_thresholds['ai_service_response_time'] else 'degraded'
|
||||
|
||||
except Exception as e:
|
||||
ai_time = 0
|
||||
connectivity_status = 'unhealthy'
|
||||
performance_status = 'unhealthy'
|
||||
logger.error(f"AI service health check failed: {str(e)}")
|
||||
|
||||
total_time = time.time() - start_time
|
||||
|
||||
return {
|
||||
'status': 'healthy' if connectivity_status == 'healthy' and performance_status == 'healthy' else 'degraded',
|
||||
'connectivity_status': connectivity_status,
|
||||
'performance_status': performance_status,
|
||||
'response_time': ai_time,
|
||||
'total_check_time': total_time,
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking AI service health: {str(e)}")
|
||||
return {
|
||||
'status': 'unhealthy',
|
||||
'error': str(e),
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def _check_system_resources(self) -> Dict[str, Any]:
|
||||
"""Check system resource usage."""
|
||||
try:
|
||||
import psutil
|
||||
|
||||
# CPU usage
|
||||
cpu_percent = psutil.cpu_percent(interval=1)
|
||||
cpu_status = 'healthy' if cpu_percent <= self.health_thresholds['cpu_usage_threshold'] else 'degraded'
|
||||
|
||||
# Memory usage
|
||||
memory = psutil.virtual_memory()
|
||||
memory_percent = memory.percent
|
||||
memory_status = 'healthy' if memory_percent <= self.health_thresholds['memory_usage_threshold'] else 'degraded'
|
||||
|
||||
# Disk usage
|
||||
disk = psutil.disk_usage('/')
|
||||
disk_percent = disk.percent
|
||||
disk_status = 'healthy' if disk_percent <= self.health_thresholds['disk_usage_threshold'] else 'degraded'
|
||||
|
||||
# Network status
|
||||
try:
|
||||
network = psutil.net_io_counters()
|
||||
network_status = 'healthy'
|
||||
except Exception:
|
||||
network_status = 'degraded'
|
||||
|
||||
return {
|
||||
'status': 'healthy' if all(s == 'healthy' for s in [cpu_status, memory_status, disk_status, network_status]) else 'degraded',
|
||||
'cpu': {
|
||||
'usage_percent': cpu_percent,
|
||||
'status': cpu_status
|
||||
},
|
||||
'memory': {
|
||||
'usage_percent': memory_percent,
|
||||
'available_gb': memory.available / (1024**3),
|
||||
'total_gb': memory.total / (1024**3),
|
||||
'status': memory_status
|
||||
},
|
||||
'disk': {
|
||||
'usage_percent': disk_percent,
|
||||
'free_gb': disk.free / (1024**3),
|
||||
'total_gb': disk.total / (1024**3),
|
||||
'status': disk_status
|
||||
},
|
||||
'network': {
|
||||
'status': network_status
|
||||
},
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking system resources: {str(e)}")
|
||||
return {
|
||||
'status': 'unhealthy',
|
||||
'error': str(e),
|
||||
'last_checked': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def _get_database_statistics(self, db: Session) -> Dict[str, Any]:
|
||||
"""Get database statistics."""
|
||||
try:
|
||||
stats = {}
|
||||
|
||||
# Get table counts (simplified)
|
||||
try:
|
||||
result = db.execute(text("SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public'"))
|
||||
stats['table_count'] = result.fetchone()[0]
|
||||
except Exception:
|
||||
stats['table_count'] = 'unknown'
|
||||
|
||||
# Get database size (simplified)
|
||||
try:
|
||||
result = db.execute(text("SELECT pg_size_pretty(pg_database_size(current_database()))"))
|
||||
stats['database_size'] = result.fetchone()[0]
|
||||
except Exception:
|
||||
stats['database_size'] = 'unknown'
|
||||
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting database statistics: {str(e)}")
|
||||
return {'error': str(e)}
|
||||
|
||||
def _determine_overall_health(self, components: Dict[str, Any]) -> str:
|
||||
"""Determine overall system health based on component status."""
|
||||
try:
|
||||
statuses = []
|
||||
for component_name, component_data in components.items():
|
||||
if isinstance(component_data, dict) and 'status' in component_data:
|
||||
statuses.append(component_data['status'])
|
||||
|
||||
if not statuses:
|
||||
return 'unknown'
|
||||
|
||||
if 'unhealthy' in statuses:
|
||||
return 'unhealthy'
|
||||
elif 'degraded' in statuses:
|
||||
return 'degraded'
|
||||
elif all(status == 'healthy' for status in statuses):
|
||||
return 'healthy'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error determining overall health: {str(e)}")
|
||||
return 'unknown'
|
||||
|
||||
def _generate_health_alerts(self, components: Dict[str, Any]) -> List[str]:
|
||||
"""Generate health alerts based on component status."""
|
||||
try:
|
||||
alerts = []
|
||||
|
||||
for component_name, component_data in components.items():
|
||||
if isinstance(component_data, dict) and 'status' in component_data:
|
||||
status = component_data['status']
|
||||
|
||||
if status == 'unhealthy':
|
||||
alerts.append(f"CRITICAL: {component_name} is unhealthy")
|
||||
elif status == 'degraded':
|
||||
alerts.append(f"WARNING: {component_name} performance is degraded")
|
||||
|
||||
# Component-specific alerts
|
||||
if component_name == 'database' and component_data.get('response_time', 0) > self.health_thresholds['database_response_time']:
|
||||
alerts.append(f"WARNING: Database response time is slow: {component_data['response_time']:.2f}s")
|
||||
|
||||
elif component_name == 'cache' and component_data.get('write_time', 0) + component_data.get('read_time', 0) > self.health_thresholds['cache_response_time']:
|
||||
alerts.append(f"WARNING: Cache response time is slow: {component_data.get('write_time', 0) + component_data.get('read_time', 0):.2f}s")
|
||||
|
||||
elif component_name == 'ai_service' and component_data.get('response_time', 0) > self.health_thresholds['ai_service_response_time']:
|
||||
alerts.append(f"WARNING: AI service response time is slow: {component_data['response_time']:.2f}s")
|
||||
|
||||
elif component_name == 'system':
|
||||
cpu_data = component_data.get('cpu', {})
|
||||
memory_data = component_data.get('memory', {})
|
||||
disk_data = component_data.get('disk', {})
|
||||
|
||||
if cpu_data.get('usage_percent', 0) > self.health_thresholds['cpu_usage_threshold']:
|
||||
alerts.append(f"WARNING: High CPU usage: {cpu_data['usage_percent']:.1f}%")
|
||||
|
||||
if memory_data.get('usage_percent', 0) > self.health_thresholds['memory_usage_threshold']:
|
||||
alerts.append(f"WARNING: High memory usage: {memory_data['usage_percent']:.1f}%")
|
||||
|
||||
if disk_data.get('usage_percent', 0) > self.health_thresholds['disk_usage_threshold']:
|
||||
alerts.append(f"WARNING: High disk usage: {disk_data['usage_percent']:.1f}%")
|
||||
|
||||
return alerts
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating health alerts: {str(e)}")
|
||||
return ['Error generating health alerts']
|
||||
|
||||
async def _generate_health_recommendations(self, components: Dict[str, Any]) -> List[str]:
|
||||
"""Generate health recommendations based on component status."""
|
||||
try:
|
||||
recommendations = []
|
||||
|
||||
for component_name, component_data in components.items():
|
||||
if isinstance(component_data, dict) and 'status' in component_data:
|
||||
status = component_data['status']
|
||||
|
||||
if status == 'unhealthy':
|
||||
if component_name == 'database':
|
||||
recommendations.append("Investigate database connectivity and configuration")
|
||||
elif component_name == 'cache':
|
||||
recommendations.append("Check cache service configuration and connectivity")
|
||||
elif component_name == 'ai_service':
|
||||
recommendations.append("Verify AI service configuration and API keys")
|
||||
elif component_name == 'system':
|
||||
recommendations.append("Check system resources and restart if necessary")
|
||||
|
||||
elif status == 'degraded':
|
||||
if component_name == 'database':
|
||||
recommendations.append("Optimize database queries and add indexes")
|
||||
elif component_name == 'cache':
|
||||
recommendations.append("Consider cache optimization and memory allocation")
|
||||
elif component_name == 'ai_service':
|
||||
recommendations.append("Review AI service performance and rate limits")
|
||||
elif component_name == 'system':
|
||||
recommendations.append("Monitor system resources and consider scaling")
|
||||
|
||||
# Specific recommendations based on metrics
|
||||
if component_name == 'database' and component_data.get('response_time', 0) > self.health_thresholds['database_response_time']:
|
||||
recommendations.append("Add database indexes for frequently queried columns")
|
||||
recommendations.append("Consider database connection pooling")
|
||||
|
||||
elif component_name == 'system':
|
||||
cpu_data = component_data.get('cpu', {})
|
||||
memory_data = component_data.get('memory', {})
|
||||
disk_data = component_data.get('disk', {})
|
||||
|
||||
if cpu_data.get('usage_percent', 0) > self.health_thresholds['cpu_usage_threshold']:
|
||||
recommendations.append("Consider scaling CPU resources or optimizing CPU-intensive operations")
|
||||
|
||||
if memory_data.get('usage_percent', 0) > self.health_thresholds['memory_usage_threshold']:
|
||||
recommendations.append("Increase memory allocation or optimize memory usage")
|
||||
|
||||
if disk_data.get('usage_percent', 0) > self.health_thresholds['disk_usage_threshold']:
|
||||
recommendations.append("Clean up disk space or increase storage capacity")
|
||||
|
||||
return recommendations
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating health recommendations: {str(e)}")
|
||||
return ['Unable to generate health recommendations']
|
||||
|
||||
async def get_health_history(self, hours: int = 24) -> List[Dict[str, Any]]:
|
||||
"""Get health check history."""
|
||||
try:
|
||||
# This would typically query a database for historical health data
|
||||
# For now, return the current health status
|
||||
return [self.health_status] if self.health_status.get('timestamp') else []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting health history: {str(e)}")
|
||||
return []
|
||||
|
||||
async def set_health_thresholds(self, thresholds: Dict[str, float]) -> bool:
|
||||
"""Update health monitoring thresholds."""
|
||||
try:
|
||||
for key, value in thresholds.items():
|
||||
if key in self.health_thresholds:
|
||||
self.health_thresholds[key] = value
|
||||
logger.info(f"Updated health threshold {key}: {value}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting health thresholds: {str(e)}")
|
||||
return False
|
||||
|
||||
async def get_health_thresholds(self) -> Dict[str, float]:
|
||||
"""Get current health monitoring thresholds."""
|
||||
return self.health_thresholds.copy()
|
||||
|
||||
async def start_continuous_monitoring(self, interval_seconds: int = 300) -> None:
|
||||
"""Start continuous health monitoring."""
|
||||
try:
|
||||
logger.info(f"Starting continuous health monitoring with {interval_seconds}s interval")
|
||||
|
||||
while True:
|
||||
try:
|
||||
# This would typically use the database session and services
|
||||
# For now, just log that monitoring is active
|
||||
logger.info("Continuous health monitoring check")
|
||||
|
||||
await asyncio.sleep(interval_seconds)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in continuous health monitoring: {str(e)}")
|
||||
await asyncio.sleep(60) # Wait 1 minute before retrying
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error starting continuous monitoring: {str(e)}")
|
||||
@@ -0,0 +1,507 @@
|
||||
"""
|
||||
Optimization Service
|
||||
Performance optimization and monitoring.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import asyncio
|
||||
from typing import Dict, Any, List, Optional, Callable
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class PerformanceOptimizationService:
|
||||
"""Service for performance optimization and monitoring."""
|
||||
|
||||
def __init__(self):
|
||||
self.performance_metrics = {
|
||||
'response_times': {},
|
||||
'database_queries': {},
|
||||
'memory_usage': {},
|
||||
'cache_hit_rates': {}
|
||||
}
|
||||
|
||||
self.optimization_config = {
|
||||
'max_response_time': 2.0, # seconds
|
||||
'max_database_queries': 10,
|
||||
'max_memory_usage': 512, # MB
|
||||
'min_cache_hit_rate': 0.8
|
||||
}
|
||||
|
||||
async def optimize_response_time(self, operation_name: str, operation_func: Callable, *args, **kwargs) -> Dict[str, Any]:
|
||||
"""Optimize response time for operations."""
|
||||
try:
|
||||
start_time = time.time()
|
||||
|
||||
# Execute operation
|
||||
result = await operation_func(*args, **kwargs)
|
||||
|
||||
end_time = time.time()
|
||||
response_time = end_time - start_time
|
||||
|
||||
# Record performance metrics
|
||||
self._record_response_time(operation_name, response_time)
|
||||
|
||||
# Check if optimization is needed
|
||||
if response_time > self.optimization_config['max_response_time']:
|
||||
optimization_suggestions = await self._suggest_response_time_optimizations(operation_name, response_time)
|
||||
logger.warning(f"Slow response time for {operation_name}: {response_time:.2f}s")
|
||||
else:
|
||||
optimization_suggestions = []
|
||||
|
||||
return {
|
||||
'result': result,
|
||||
'response_time': response_time,
|
||||
'optimization_suggestions': optimization_suggestions,
|
||||
'performance_status': 'optimal' if response_time <= self.optimization_config['max_response_time'] else 'needs_optimization'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing response time for {operation_name}: {str(e)}")
|
||||
return {
|
||||
'result': None,
|
||||
'response_time': 0.0,
|
||||
'optimization_suggestions': ['Error occurred during operation'],
|
||||
'performance_status': 'error'
|
||||
}
|
||||
|
||||
async def optimize_database_queries(self, db: Session, query_func: Callable, *args, **kwargs) -> Dict[str, Any]:
|
||||
"""Optimize database queries."""
|
||||
try:
|
||||
start_time = time.time()
|
||||
query_count_before = self._get_query_count(db)
|
||||
|
||||
# Execute query function
|
||||
result = await query_func(db, *args, **kwargs)
|
||||
|
||||
end_time = time.time()
|
||||
query_count_after = self._get_query_count(db)
|
||||
query_count = query_count_after - query_count_before
|
||||
response_time = end_time - start_time
|
||||
|
||||
# Record database performance
|
||||
self._record_database_performance(query_func.__name__, query_count, response_time)
|
||||
|
||||
# Check if optimization is needed
|
||||
if query_count > self.optimization_config['max_database_queries']:
|
||||
optimization_suggestions = await self._suggest_database_optimizations(query_func.__name__, query_count, response_time)
|
||||
logger.warning(f"High query count for {query_func.__name__}: {query_count} queries")
|
||||
else:
|
||||
optimization_suggestions = []
|
||||
|
||||
return {
|
||||
'result': result,
|
||||
'query_count': query_count,
|
||||
'response_time': response_time,
|
||||
'optimization_suggestions': optimization_suggestions,
|
||||
'performance_status': 'optimal' if query_count <= self.optimization_config['max_database_queries'] else 'needs_optimization'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing database queries for {query_func.__name__}: {str(e)}")
|
||||
return {
|
||||
'result': None,
|
||||
'query_count': 0,
|
||||
'response_time': 0.0,
|
||||
'optimization_suggestions': ['Error occurred during database operation'],
|
||||
'performance_status': 'error'
|
||||
}
|
||||
|
||||
async def optimize_memory_usage(self, operation_name: str, operation_func: Callable, *args, **kwargs) -> Dict[str, Any]:
|
||||
"""Optimize memory usage for operations."""
|
||||
try:
|
||||
import psutil
|
||||
import os
|
||||
|
||||
process = psutil.Process(os.getpid())
|
||||
memory_before = process.memory_info().rss / 1024 / 1024 # MB
|
||||
|
||||
# Execute operation
|
||||
result = await operation_func(*args, **kwargs)
|
||||
|
||||
memory_after = process.memory_info().rss / 1024 / 1024 # MB
|
||||
memory_used = memory_after - memory_before
|
||||
|
||||
# Record memory usage
|
||||
self._record_memory_usage(operation_name, memory_used)
|
||||
|
||||
# Check if optimization is needed
|
||||
if memory_used > self.optimization_config['max_memory_usage']:
|
||||
optimization_suggestions = await self._suggest_memory_optimizations(operation_name, memory_used)
|
||||
logger.warning(f"High memory usage for {operation_name}: {memory_used:.2f}MB")
|
||||
else:
|
||||
optimization_suggestions = []
|
||||
|
||||
return {
|
||||
'result': result,
|
||||
'memory_used_mb': memory_used,
|
||||
'optimization_suggestions': optimization_suggestions,
|
||||
'performance_status': 'optimal' if memory_used <= self.optimization_config['max_memory_usage'] else 'needs_optimization'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing memory usage for {operation_name}: {str(e)}")
|
||||
return {
|
||||
'result': None,
|
||||
'memory_used_mb': 0.0,
|
||||
'optimization_suggestions': ['Error occurred during memory optimization'],
|
||||
'performance_status': 'error'
|
||||
}
|
||||
|
||||
async def optimize_cache_performance(self, cache_service, operation_name: str) -> Dict[str, Any]:
|
||||
"""Optimize cache performance."""
|
||||
try:
|
||||
# Get cache statistics
|
||||
cache_stats = await cache_service.get_cache_stats()
|
||||
|
||||
# Calculate cache hit rates
|
||||
hit_rates = {}
|
||||
for cache_type, stats in cache_stats.items():
|
||||
if stats.get('entries', 0) > 0:
|
||||
# This is a simplified calculation - in practice, you'd track actual hits/misses
|
||||
hit_rates[cache_type] = 0.8 # Placeholder
|
||||
|
||||
# Record cache performance
|
||||
self._record_cache_performance(operation_name, hit_rates)
|
||||
|
||||
# Check if optimization is needed
|
||||
optimization_suggestions = []
|
||||
for cache_type, hit_rate in hit_rates.items():
|
||||
if hit_rate < self.optimization_config['min_cache_hit_rate']:
|
||||
optimization_suggestions.append(f"Low cache hit rate for {cache_type}: {hit_rate:.2%}")
|
||||
|
||||
return {
|
||||
'cache_stats': cache_stats,
|
||||
'hit_rates': hit_rates,
|
||||
'optimization_suggestions': optimization_suggestions,
|
||||
'performance_status': 'optimal' if not optimization_suggestions else 'needs_optimization'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing cache performance: {str(e)}")
|
||||
return {
|
||||
'cache_stats': {},
|
||||
'hit_rates': {},
|
||||
'optimization_suggestions': ['Error occurred during cache optimization'],
|
||||
'performance_status': 'error'
|
||||
}
|
||||
|
||||
def _record_response_time(self, operation_name: str, response_time: float) -> None:
|
||||
"""Record response time metrics."""
|
||||
try:
|
||||
if operation_name not in self.performance_metrics['response_times']:
|
||||
self.performance_metrics['response_times'][operation_name] = []
|
||||
|
||||
self.performance_metrics['response_times'][operation_name].append({
|
||||
'response_time': response_time,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
})
|
||||
|
||||
# Keep only last 100 entries
|
||||
if len(self.performance_metrics['response_times'][operation_name]) > 100:
|
||||
self.performance_metrics['response_times'][operation_name] = self.performance_metrics['response_times'][operation_name][-100:]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording response time: {str(e)}")
|
||||
|
||||
def _record_database_performance(self, operation_name: str, query_count: int, response_time: float) -> None:
|
||||
"""Record database performance metrics."""
|
||||
try:
|
||||
if operation_name not in self.performance_metrics['database_queries']:
|
||||
self.performance_metrics['database_queries'][operation_name] = []
|
||||
|
||||
self.performance_metrics['database_queries'][operation_name].append({
|
||||
'query_count': query_count,
|
||||
'response_time': response_time,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
})
|
||||
|
||||
# Keep only last 100 entries
|
||||
if len(self.performance_metrics['database_queries'][operation_name]) > 100:
|
||||
self.performance_metrics['database_queries'][operation_name] = self.performance_metrics['database_queries'][operation_name][-100:]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording database performance: {str(e)}")
|
||||
|
||||
def _record_memory_usage(self, operation_name: str, memory_used: float) -> None:
|
||||
"""Record memory usage metrics."""
|
||||
try:
|
||||
if operation_name not in self.performance_metrics['memory_usage']:
|
||||
self.performance_metrics['memory_usage'][operation_name] = []
|
||||
|
||||
self.performance_metrics['memory_usage'][operation_name].append({
|
||||
'memory_used_mb': memory_used,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
})
|
||||
|
||||
# Keep only last 100 entries
|
||||
if len(self.performance_metrics['memory_usage'][operation_name]) > 100:
|
||||
self.performance_metrics['memory_usage'][operation_name] = self.performance_metrics['memory_usage'][operation_name][-100:]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording memory usage: {str(e)}")
|
||||
|
||||
def _record_cache_performance(self, operation_name: str, hit_rates: Dict[str, float]) -> None:
|
||||
"""Record cache performance metrics."""
|
||||
try:
|
||||
if operation_name not in self.performance_metrics['cache_hit_rates']:
|
||||
self.performance_metrics['cache_hit_rates'][operation_name] = []
|
||||
|
||||
self.performance_metrics['cache_hit_rates'][operation_name].append({
|
||||
'hit_rates': hit_rates,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
})
|
||||
|
||||
# Keep only last 100 entries
|
||||
if len(self.performance_metrics['cache_hit_rates'][operation_name]) > 100:
|
||||
self.performance_metrics['cache_hit_rates'][operation_name] = self.performance_metrics['cache_hit_rates'][operation_name][-100:]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording cache performance: {str(e)}")
|
||||
|
||||
def _get_query_count(self, db: Session) -> int:
|
||||
"""Get current query count from database session."""
|
||||
try:
|
||||
# This is a simplified implementation
|
||||
# In practice, you'd use database-specific monitoring tools
|
||||
return 0
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting query count: {str(e)}")
|
||||
return 0
|
||||
|
||||
async def _suggest_response_time_optimizations(self, operation_name: str, response_time: float) -> List[str]:
|
||||
"""Suggest optimizations for slow response times."""
|
||||
try:
|
||||
suggestions = []
|
||||
|
||||
if response_time > 5.0:
|
||||
suggestions.append("Consider implementing caching for this operation")
|
||||
suggestions.append("Review database query optimization")
|
||||
suggestions.append("Consider async processing for heavy operations")
|
||||
elif response_time > 2.0:
|
||||
suggestions.append("Optimize database queries")
|
||||
suggestions.append("Consider adding indexes for frequently accessed data")
|
||||
suggestions.append("Review data processing algorithms")
|
||||
|
||||
# Add operation-specific suggestions
|
||||
if 'ai_analysis' in operation_name.lower():
|
||||
suggestions.append("Consider implementing AI response caching")
|
||||
suggestions.append("Review AI service integration efficiency")
|
||||
elif 'onboarding' in operation_name.lower():
|
||||
suggestions.append("Optimize data transformation algorithms")
|
||||
suggestions.append("Consider batch processing for large datasets")
|
||||
|
||||
return suggestions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error suggesting response time optimizations: {str(e)}")
|
||||
return ["Unable to generate optimization suggestions"]
|
||||
|
||||
async def _suggest_database_optimizations(self, operation_name: str, query_count: int, response_time: float) -> List[str]:
|
||||
"""Suggest optimizations for database performance."""
|
||||
try:
|
||||
suggestions = []
|
||||
|
||||
if query_count > 20:
|
||||
suggestions.append("Implement query batching to reduce database calls")
|
||||
suggestions.append("Review and optimize N+1 query patterns")
|
||||
suggestions.append("Consider implementing database connection pooling")
|
||||
elif query_count > 10:
|
||||
suggestions.append("Optimize database queries with proper indexing")
|
||||
suggestions.append("Consider implementing query result caching")
|
||||
suggestions.append("Review database schema for optimization opportunities")
|
||||
|
||||
if response_time > 1.0:
|
||||
suggestions.append("Add database indexes for frequently queried columns")
|
||||
suggestions.append("Consider read replicas for heavy read operations")
|
||||
suggestions.append("Optimize database connection settings")
|
||||
|
||||
# Add operation-specific suggestions
|
||||
if 'strategy' in operation_name.lower():
|
||||
suggestions.append("Consider implementing strategy data caching")
|
||||
suggestions.append("Optimize strategy-related database queries")
|
||||
elif 'onboarding' in operation_name.lower():
|
||||
suggestions.append("Batch onboarding data processing")
|
||||
suggestions.append("Optimize onboarding data retrieval queries")
|
||||
|
||||
return suggestions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error suggesting database optimizations: {str(e)}")
|
||||
return ["Unable to generate database optimization suggestions"]
|
||||
|
||||
async def _suggest_memory_optimizations(self, operation_name: str, memory_used: float) -> List[str]:
|
||||
"""Suggest optimizations for memory usage."""
|
||||
try:
|
||||
suggestions = []
|
||||
|
||||
if memory_used > 100:
|
||||
suggestions.append("Implement data streaming for large datasets")
|
||||
suggestions.append("Review memory-intensive data structures")
|
||||
suggestions.append("Consider implementing pagination")
|
||||
elif memory_used > 50:
|
||||
suggestions.append("Optimize data processing algorithms")
|
||||
suggestions.append("Review object lifecycle management")
|
||||
suggestions.append("Consider implementing lazy loading")
|
||||
|
||||
# Add operation-specific suggestions
|
||||
if 'ai_analysis' in operation_name.lower():
|
||||
suggestions.append("Implement AI response streaming")
|
||||
suggestions.append("Optimize AI model memory usage")
|
||||
elif 'onboarding' in operation_name.lower():
|
||||
suggestions.append("Process onboarding data in smaller chunks")
|
||||
suggestions.append("Implement data cleanup after processing")
|
||||
|
||||
return suggestions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error suggesting memory optimizations: {str(e)}")
|
||||
return ["Unable to generate memory optimization suggestions"]
|
||||
|
||||
async def get_performance_report(self) -> Dict[str, Any]:
|
||||
"""Generate comprehensive performance report."""
|
||||
try:
|
||||
report = {
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'response_times': self._calculate_average_response_times(),
|
||||
'database_performance': self._calculate_database_performance(),
|
||||
'memory_usage': self._calculate_memory_usage(),
|
||||
'cache_performance': self._calculate_cache_performance(),
|
||||
'optimization_recommendations': await self._generate_optimization_recommendations()
|
||||
}
|
||||
|
||||
return report
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating performance report: {str(e)}")
|
||||
return {
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
def _calculate_average_response_times(self) -> Dict[str, float]:
|
||||
"""Calculate average response times for operations."""
|
||||
try:
|
||||
averages = {}
|
||||
for operation_name, times in self.performance_metrics['response_times'].items():
|
||||
if times:
|
||||
avg_time = sum(t['response_time'] for t in times) / len(times)
|
||||
averages[operation_name] = avg_time
|
||||
|
||||
return averages
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating average response times: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _calculate_database_performance(self) -> Dict[str, Dict[str, float]]:
|
||||
"""Calculate database performance metrics."""
|
||||
try:
|
||||
performance = {}
|
||||
for operation_name, queries in self.performance_metrics['database_queries'].items():
|
||||
if queries:
|
||||
avg_queries = sum(q['query_count'] for q in queries) / len(queries)
|
||||
avg_time = sum(q['response_time'] for q in queries) / len(queries)
|
||||
performance[operation_name] = {
|
||||
'average_queries': avg_queries,
|
||||
'average_response_time': avg_time
|
||||
}
|
||||
|
||||
return performance
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating database performance: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _calculate_memory_usage(self) -> Dict[str, float]:
|
||||
"""Calculate average memory usage for operations."""
|
||||
try:
|
||||
averages = {}
|
||||
for operation_name, usage in self.performance_metrics['memory_usage'].items():
|
||||
if usage:
|
||||
avg_memory = sum(u['memory_used_mb'] for u in usage) / len(usage)
|
||||
averages[operation_name] = avg_memory
|
||||
|
||||
return averages
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating memory usage: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _calculate_cache_performance(self) -> Dict[str, float]:
|
||||
"""Calculate cache performance metrics."""
|
||||
try:
|
||||
performance = {}
|
||||
for operation_name, rates in self.performance_metrics['cache_hit_rates'].items():
|
||||
if rates:
|
||||
# Calculate average hit rate across all cache types
|
||||
all_rates = []
|
||||
for rate_data in rates:
|
||||
if rate_data['hit_rates']:
|
||||
avg_rate = sum(rate_data['hit_rates'].values()) / len(rate_data['hit_rates'])
|
||||
all_rates.append(avg_rate)
|
||||
|
||||
if all_rates:
|
||||
performance[operation_name] = sum(all_rates) / len(all_rates)
|
||||
|
||||
return performance
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating cache performance: {str(e)}")
|
||||
return {}
|
||||
|
||||
async def _generate_optimization_recommendations(self) -> List[str]:
|
||||
"""Generate optimization recommendations based on performance data."""
|
||||
try:
|
||||
recommendations = []
|
||||
|
||||
# Check response times
|
||||
avg_response_times = self._calculate_average_response_times()
|
||||
for operation, avg_time in avg_response_times.items():
|
||||
if avg_time > self.optimization_config['max_response_time']:
|
||||
recommendations.append(f"Optimize response time for {operation} (avg: {avg_time:.2f}s)")
|
||||
|
||||
# Check database performance
|
||||
db_performance = self._calculate_database_performance()
|
||||
for operation, perf in db_performance.items():
|
||||
if perf['average_queries'] > self.optimization_config['max_database_queries']:
|
||||
recommendations.append(f"Reduce database queries for {operation} (avg: {perf['average_queries']:.1f} queries)")
|
||||
|
||||
# Check memory usage
|
||||
memory_usage = self._calculate_memory_usage()
|
||||
for operation, memory in memory_usage.items():
|
||||
if memory > self.optimization_config['max_memory_usage']:
|
||||
recommendations.append(f"Optimize memory usage for {operation} (avg: {memory:.1f}MB)")
|
||||
|
||||
return recommendations
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating optimization recommendations: {str(e)}")
|
||||
return ["Unable to generate optimization recommendations"]
|
||||
|
||||
async def cleanup_old_metrics(self, days_to_keep: int = 30) -> Dict[str, int]:
|
||||
"""Clean up old performance metrics."""
|
||||
try:
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=days_to_keep)
|
||||
cleaned_count = 0
|
||||
|
||||
for metric_type, operations in self.performance_metrics.items():
|
||||
for operation_name, metrics in operations.items():
|
||||
if isinstance(metrics, list):
|
||||
original_count = len(metrics)
|
||||
# Filter out old metrics
|
||||
self.performance_metrics[metric_type][operation_name] = [
|
||||
m for m in metrics
|
||||
if datetime.fromisoformat(m['timestamp']) > cutoff_date
|
||||
]
|
||||
cleaned_count += original_count - len(self.performance_metrics[metric_type][operation_name])
|
||||
|
||||
logger.info(f"Cleaned up {cleaned_count} old performance metrics")
|
||||
return {'cleaned_count': cleaned_count}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up old metrics: {str(e)}")
|
||||
return {'cleaned_count': 0}
|
||||
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
Utils Module
|
||||
Data processing and validation utilities.
|
||||
"""
|
||||
|
||||
from .data_processors import DataProcessorService
|
||||
from .validators import ValidationService
|
||||
|
||||
__all__ = ['DataProcessorService', 'ValidationService']
|
||||
@@ -0,0 +1,451 @@
|
||||
"""
|
||||
Data Processor Service
|
||||
Data processing utilities.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import re
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DataProcessorService:
|
||||
"""Service for data processing utilities."""
|
||||
|
||||
def __init__(self):
|
||||
self.cleaning_patterns = {
|
||||
'html_tags': re.compile(r'<[^>]+>'),
|
||||
'extra_whitespace': re.compile(r'\s+'),
|
||||
'special_chars': re.compile(r'[^\w\s\-.,!?;:()]'),
|
||||
'multiple_spaces': re.compile(r'\s{2,}'),
|
||||
'leading_trailing_spaces': re.compile(r'^\s+|\s+$')
|
||||
}
|
||||
|
||||
def transform_data_structure(self, data: Union[Dict, List, str], target_format: str = 'dict') -> Union[Dict, List, str]:
|
||||
"""Transform data between different structures."""
|
||||
try:
|
||||
if target_format == 'dict':
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
elif isinstance(data, list):
|
||||
return {str(i): item for i, item in enumerate(data)}
|
||||
elif isinstance(data, str):
|
||||
try:
|
||||
return json.loads(data)
|
||||
except json.JSONDecodeError:
|
||||
return {'value': data}
|
||||
else:
|
||||
return {'value': str(data)}
|
||||
|
||||
elif target_format == 'list':
|
||||
if isinstance(data, list):
|
||||
return data
|
||||
elif isinstance(data, dict):
|
||||
return list(data.values())
|
||||
elif isinstance(data, str):
|
||||
return [data]
|
||||
else:
|
||||
return [str(data)]
|
||||
|
||||
elif target_format == 'string':
|
||||
if isinstance(data, str):
|
||||
return data
|
||||
elif isinstance(data, (dict, list)):
|
||||
return json.dumps(data, default=str)
|
||||
else:
|
||||
return str(data)
|
||||
|
||||
else:
|
||||
logger.warning(f"Unknown target format: {target_format}")
|
||||
return data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error transforming data structure: {str(e)}")
|
||||
return data
|
||||
|
||||
def clean_text_data(self, text: str, cleaning_level: str = 'standard') -> str:
|
||||
"""Clean and normalize text data."""
|
||||
try:
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
|
||||
if cleaning_level == 'minimal':
|
||||
# Basic cleaning
|
||||
cleaned = self.cleaning_patterns['leading_trailing_spaces'].sub('', text)
|
||||
cleaned = self.cleaning_patterns['multiple_spaces'].sub(' ', cleaned)
|
||||
return cleaned.strip()
|
||||
|
||||
elif cleaning_level == 'standard':
|
||||
# Standard cleaning
|
||||
cleaned = self.cleaning_patterns['html_tags'].sub('', text)
|
||||
cleaned = self.cleaning_patterns['leading_trailing_spaces'].sub('', cleaned)
|
||||
cleaned = self.cleaning_patterns['multiple_spaces'].sub(' ', cleaned)
|
||||
return cleaned.strip()
|
||||
|
||||
elif cleaning_level == 'aggressive':
|
||||
# Aggressive cleaning
|
||||
cleaned = self.cleaning_patterns['html_tags'].sub('', text)
|
||||
cleaned = self.cleaning_patterns['special_chars'].sub('', cleaned)
|
||||
cleaned = self.cleaning_patterns['leading_trailing_spaces'].sub('', cleaned)
|
||||
cleaned = self.cleaning_patterns['multiple_spaces'].sub(' ', cleaned)
|
||||
return cleaned.strip()
|
||||
|
||||
else:
|
||||
logger.warning(f"Unknown cleaning level: {cleaning_level}")
|
||||
return text.strip()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning text data: {str(e)}")
|
||||
return str(text)
|
||||
|
||||
def clean_dict_data(self, data: Dict[str, Any], cleaning_level: str = 'standard') -> Dict[str, Any]:
|
||||
"""Clean dictionary data recursively."""
|
||||
try:
|
||||
cleaned_data = {}
|
||||
|
||||
for key, value in data.items():
|
||||
# Clean key
|
||||
cleaned_key = self.clean_text_data(str(key), cleaning_level)
|
||||
|
||||
# Clean value
|
||||
if isinstance(value, str):
|
||||
cleaned_value = self.clean_text_data(value, cleaning_level)
|
||||
elif isinstance(value, dict):
|
||||
cleaned_value = self.clean_dict_data(value, cleaning_level)
|
||||
elif isinstance(value, list):
|
||||
cleaned_value = [self.clean_text_data(str(item), cleaning_level) if isinstance(item, str) else item for item in value]
|
||||
else:
|
||||
cleaned_value = value
|
||||
|
||||
cleaned_data[cleaned_key] = cleaned_value
|
||||
|
||||
return cleaned_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning dict data: {str(e)}")
|
||||
return data
|
||||
|
||||
def enrich_data_with_metadata(self, data: Dict[str, Any], source: str = 'unknown') -> Dict[str, Any]:
|
||||
"""Enrich data with metadata."""
|
||||
try:
|
||||
enriched_data = data.copy()
|
||||
|
||||
# Add metadata
|
||||
enriched_data['_metadata'] = {
|
||||
'processed_at': datetime.utcnow().isoformat(),
|
||||
'source': source,
|
||||
'data_type': self._determine_data_type(data),
|
||||
'size': len(str(data)),
|
||||
'field_count': len(data) if isinstance(data, dict) else 0
|
||||
}
|
||||
|
||||
return enriched_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error enriching data with metadata: {str(e)}")
|
||||
return data
|
||||
|
||||
def _determine_data_type(self, data: Any) -> str:
|
||||
"""Determine the type of data."""
|
||||
try:
|
||||
if isinstance(data, dict):
|
||||
return 'object'
|
||||
elif isinstance(data, list):
|
||||
return 'array'
|
||||
elif isinstance(data, str):
|
||||
return 'string'
|
||||
elif isinstance(data, (int, float)):
|
||||
return 'number'
|
||||
elif isinstance(data, bool):
|
||||
return 'boolean'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error determining data type: {str(e)}")
|
||||
return 'unknown'
|
||||
|
||||
def validate_data_completeness(self, data: Dict[str, Any], required_fields: List[str]) -> Dict[str, Any]:
|
||||
"""Validate data completeness against required fields."""
|
||||
try:
|
||||
validation_result = {
|
||||
'is_complete': True,
|
||||
'missing_fields': [],
|
||||
'present_fields': [],
|
||||
'completeness_score': 0.0,
|
||||
'validation_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
present_count = 0
|
||||
for field in required_fields:
|
||||
if field in data and data[field] is not None and data[field] != '':
|
||||
validation_result['present_fields'].append(field)
|
||||
present_count += 1
|
||||
else:
|
||||
validation_result['missing_fields'].append(field)
|
||||
|
||||
# Calculate completeness score
|
||||
if required_fields:
|
||||
validation_result['completeness_score'] = present_count / len(required_fields)
|
||||
validation_result['is_complete'] = validation_result['completeness_score'] >= 0.8
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating data completeness: {str(e)}")
|
||||
return {
|
||||
'is_complete': False,
|
||||
'missing_fields': required_fields,
|
||||
'present_fields': [],
|
||||
'completeness_score': 0.0,
|
||||
'validation_timestamp': datetime.utcnow().isoformat(),
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
def normalize_field_values(self, data: Dict[str, Any], field_mappings: Dict[str, str]) -> Dict[str, Any]:
|
||||
"""Normalize field values based on mappings."""
|
||||
try:
|
||||
normalized_data = {}
|
||||
|
||||
for original_field, normalized_field in field_mappings.items():
|
||||
if original_field in data:
|
||||
normalized_data[normalized_field] = data[original_field]
|
||||
|
||||
return normalized_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error normalizing field values: {str(e)}")
|
||||
return data
|
||||
|
||||
def merge_data_sources(self, data_sources: List[Dict[str, Any]], merge_strategy: str = 'prefer_first') -> Dict[str, Any]:
|
||||
"""Merge multiple data sources."""
|
||||
try:
|
||||
if not data_sources:
|
||||
return {}
|
||||
|
||||
if len(data_sources) == 1:
|
||||
return data_sources[0]
|
||||
|
||||
merged_data = {}
|
||||
|
||||
if merge_strategy == 'prefer_first':
|
||||
# Prefer first non-empty value
|
||||
for source in data_sources:
|
||||
for key, value in source.items():
|
||||
if key not in merged_data or merged_data[key] is None or merged_data[key] == '':
|
||||
merged_data[key] = value
|
||||
|
||||
elif merge_strategy == 'prefer_last':
|
||||
# Prefer last non-empty value
|
||||
for source in data_sources:
|
||||
for key, value in source.items():
|
||||
if value is not None and value != '':
|
||||
merged_data[key] = value
|
||||
|
||||
elif merge_strategy == 'combine':
|
||||
# Combine all values
|
||||
for source in data_sources:
|
||||
for key, value in source.items():
|
||||
if key not in merged_data:
|
||||
merged_data[key] = []
|
||||
if isinstance(merged_data[key], list):
|
||||
merged_data[key].append(value)
|
||||
else:
|
||||
merged_data[key] = [merged_data[key], value]
|
||||
|
||||
elif merge_strategy == 'intersection':
|
||||
# Only include fields present in all sources
|
||||
common_keys = set(data_sources[0].keys())
|
||||
for source in data_sources[1:]:
|
||||
common_keys = common_keys.intersection(set(source.keys()))
|
||||
|
||||
for key in common_keys:
|
||||
values = [source[key] for source in data_sources if key in source]
|
||||
merged_data[key] = values[0] if values else None
|
||||
|
||||
return merged_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error merging data sources: {str(e)}")
|
||||
return data_sources[0] if data_sources else {}
|
||||
|
||||
def filter_data_by_criteria(self, data: Dict[str, Any], criteria: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Filter data based on criteria."""
|
||||
try:
|
||||
filtered_data = {}
|
||||
|
||||
for key, value in data.items():
|
||||
include_field = True
|
||||
|
||||
# Check if field should be included based on criteria
|
||||
if 'include_fields' in criteria and key not in criteria['include_fields']:
|
||||
include_field = False
|
||||
|
||||
if 'exclude_fields' in criteria and key in criteria['exclude_fields']:
|
||||
include_field = False
|
||||
|
||||
# Check value-based criteria
|
||||
if 'min_length' in criteria and isinstance(value, str) and len(value) < criteria['min_length']:
|
||||
include_field = False
|
||||
|
||||
if 'max_length' in criteria and isinstance(value, str) and len(value) > criteria['max_length']:
|
||||
include_field = False
|
||||
|
||||
if 'required_values' in criteria and key in criteria['required_values']:
|
||||
if value not in criteria['required_values'][key]:
|
||||
include_field = False
|
||||
|
||||
if include_field:
|
||||
filtered_data[key] = value
|
||||
|
||||
return filtered_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error filtering data by criteria: {str(e)}")
|
||||
return data
|
||||
|
||||
def format_data_for_output(self, data: Dict[str, Any], output_format: str = 'json') -> Union[str, Dict[str, Any]]:
|
||||
"""Format data for different output formats."""
|
||||
try:
|
||||
if output_format == 'json':
|
||||
return json.dumps(data, indent=2, default=str)
|
||||
|
||||
elif output_format == 'dict':
|
||||
return data
|
||||
|
||||
elif output_format == 'csv':
|
||||
# Convert to CSV format (simplified)
|
||||
csv_lines = []
|
||||
if data:
|
||||
# Headers
|
||||
headers = list(data.keys())
|
||||
csv_lines.append(','.join(headers))
|
||||
|
||||
# Values
|
||||
values = [str(data.get(header, '')) for header in headers]
|
||||
csv_lines.append(','.join(values))
|
||||
|
||||
return '\n'.join(csv_lines)
|
||||
|
||||
elif output_format == 'xml':
|
||||
# Convert to XML format (simplified)
|
||||
xml_lines = ['<?xml version="1.0" encoding="UTF-8"?>', '<data>']
|
||||
|
||||
for key, value in data.items():
|
||||
xml_lines.append(f' <{key}>{value}</{key}>')
|
||||
|
||||
xml_lines.append('</data>')
|
||||
return '\n'.join(xml_lines)
|
||||
|
||||
else:
|
||||
logger.warning(f"Unknown output format: {output_format}")
|
||||
return data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error formatting data for output: {str(e)}")
|
||||
return str(data)
|
||||
|
||||
def validate_data_types(self, data: Dict[str, Any], type_schema: Dict[str, str]) -> Dict[str, Any]:
|
||||
"""Validate data types against a schema."""
|
||||
try:
|
||||
validation_result = {
|
||||
'is_valid': True,
|
||||
'type_errors': [],
|
||||
'validation_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
for field, expected_type in type_schema.items():
|
||||
if field in data:
|
||||
value = data[field]
|
||||
actual_type = self._determine_data_type(value)
|
||||
|
||||
if actual_type != expected_type:
|
||||
validation_result['type_errors'].append({
|
||||
'field': field,
|
||||
'expected_type': expected_type,
|
||||
'actual_type': actual_type,
|
||||
'value': value
|
||||
})
|
||||
validation_result['is_valid'] = False
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating data types: {str(e)}")
|
||||
return {
|
||||
'is_valid': False,
|
||||
'type_errors': [{'error': str(e)}],
|
||||
'validation_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
def sanitize_sensitive_data(self, data: Dict[str, Any], sensitive_fields: List[str]) -> Dict[str, Any]:
|
||||
"""Sanitize sensitive data fields."""
|
||||
try:
|
||||
sanitized_data = data.copy()
|
||||
|
||||
for field in sensitive_fields:
|
||||
if field in sanitized_data:
|
||||
value = sanitized_data[field]
|
||||
if isinstance(value, str) and len(value) > 4:
|
||||
# Replace with asterisks, keeping first and last character
|
||||
sanitized_data[field] = value[0] + '*' * (len(value) - 2) + value[-1]
|
||||
else:
|
||||
sanitized_data[field] = '***'
|
||||
|
||||
return sanitized_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sanitizing sensitive data: {str(e)}")
|
||||
return data
|
||||
|
||||
def calculate_data_statistics(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Calculate statistics about the data."""
|
||||
try:
|
||||
stats = {
|
||||
'total_fields': len(data),
|
||||
'string_fields': 0,
|
||||
'numeric_fields': 0,
|
||||
'boolean_fields': 0,
|
||||
'object_fields': 0,
|
||||
'array_fields': 0,
|
||||
'null_fields': 0,
|
||||
'empty_fields': 0,
|
||||
'average_field_length': 0.0
|
||||
}
|
||||
|
||||
total_length = 0
|
||||
field_count = 0
|
||||
|
||||
for key, value in data.items():
|
||||
if value is None:
|
||||
stats['null_fields'] += 1
|
||||
elif value == '':
|
||||
stats['empty_fields'] += 1
|
||||
else:
|
||||
data_type = self._determine_data_type(value)
|
||||
if data_type == 'string':
|
||||
stats['string_fields'] += 1
|
||||
total_length += len(str(value))
|
||||
field_count += 1
|
||||
elif data_type == 'number':
|
||||
stats['numeric_fields'] += 1
|
||||
elif data_type == 'boolean':
|
||||
stats['boolean_fields'] += 1
|
||||
elif data_type == 'object':
|
||||
stats['object_fields'] += 1
|
||||
elif data_type == 'array':
|
||||
stats['array_fields'] += 1
|
||||
|
||||
if field_count > 0:
|
||||
stats['average_field_length'] = total_length / field_count
|
||||
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating data statistics: {str(e)}")
|
||||
return {
|
||||
'error': str(e),
|
||||
'total_fields': 0
|
||||
}
|
||||
@@ -0,0 +1,473 @@
|
||||
"""
|
||||
Validation Service
|
||||
Data validation utilities.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ValidationService:
|
||||
"""Service for data validation and business rule checking."""
|
||||
|
||||
def __init__(self):
|
||||
self.validation_patterns = {
|
||||
'email': re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'),
|
||||
'url': re.compile(r'^https?://(?:[-\w.])+(?:[:\d]+)?(?:/(?:[\w/_.])*(?:\?(?:[\w&=%.])*)?(?:#(?:[\w.])*)?)?$'),
|
||||
'phone': re.compile(r'^\+?1?\d{9,15}$'),
|
||||
'domain': re.compile(r'^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$'),
|
||||
'alphanumeric': re.compile(r'^[a-zA-Z0-9\s]+$'),
|
||||
'numeric': re.compile(r'^\d+(\.\d+)?$'),
|
||||
'integer': re.compile(r'^\d+$')
|
||||
}
|
||||
|
||||
self.business_rules = {
|
||||
'content_budget': {
|
||||
'min_value': 0,
|
||||
'max_value': 1000000,
|
||||
'required': True
|
||||
},
|
||||
'team_size': {
|
||||
'min_value': 1,
|
||||
'max_value': 100,
|
||||
'required': True
|
||||
},
|
||||
'implementation_timeline': {
|
||||
'min_days': 1,
|
||||
'max_days': 365,
|
||||
'required': True
|
||||
},
|
||||
'market_share': {
|
||||
'min_value': 0,
|
||||
'max_value': 100,
|
||||
'required': False
|
||||
}
|
||||
}
|
||||
|
||||
def validate_field(self, field_name: str, value: Any, field_type: str = 'string', **kwargs) -> Dict[str, Any]:
|
||||
"""Validate a single field."""
|
||||
try:
|
||||
validation_result = {
|
||||
'field_name': field_name,
|
||||
'value': value,
|
||||
'is_valid': True,
|
||||
'errors': [],
|
||||
'warnings': [],
|
||||
'validation_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Check if value is required
|
||||
if kwargs.get('required', False) and (value is None or value == ''):
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' is required")
|
||||
return validation_result
|
||||
|
||||
# Skip validation if value is None and not required
|
||||
if value is None or value == '':
|
||||
return validation_result
|
||||
|
||||
# Type-specific validation
|
||||
if field_type == 'email':
|
||||
validation_result = self._validate_email(field_name, value, validation_result)
|
||||
elif field_type == 'url':
|
||||
validation_result = self._validate_url(field_name, value, validation_result)
|
||||
elif field_type == 'phone':
|
||||
validation_result = self._validate_phone(field_name, value, validation_result)
|
||||
elif field_type == 'domain':
|
||||
validation_result = self._validate_domain(field_name, value, validation_result)
|
||||
elif field_type == 'alphanumeric':
|
||||
validation_result = self._validate_alphanumeric(field_name, value, validation_result)
|
||||
elif field_type == 'numeric':
|
||||
validation_result = self._validate_numeric(field_name, value, validation_result)
|
||||
elif field_type == 'integer':
|
||||
validation_result = self._validate_integer(field_name, value, validation_result)
|
||||
elif field_type == 'date':
|
||||
validation_result = self._validate_date(field_name, value, validation_result)
|
||||
elif field_type == 'json':
|
||||
validation_result = self._validate_json(field_name, value, validation_result)
|
||||
else:
|
||||
validation_result = self._validate_string(field_name, value, validation_result)
|
||||
|
||||
# Length validation
|
||||
if 'min_length' in kwargs and len(str(value)) < kwargs['min_length']:
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must be at least {kwargs['min_length']} characters long")
|
||||
|
||||
if 'max_length' in kwargs and len(str(value)) > kwargs['max_length']:
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must be no more than {kwargs['max_length']} characters long")
|
||||
|
||||
# Range validation for numeric fields
|
||||
if field_type in ['numeric', 'integer']:
|
||||
if 'min_value' in kwargs and float(value) < kwargs['min_value']:
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must be at least {kwargs['min_value']}")
|
||||
|
||||
if 'max_value' in kwargs and float(value) > kwargs['max_value']:
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must be no more than {kwargs['max_value']}")
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating field {field_name}: {str(e)}")
|
||||
return {
|
||||
'field_name': field_name,
|
||||
'value': value,
|
||||
'is_valid': False,
|
||||
'errors': [f"Validation error: {str(e)}"],
|
||||
'warnings': [],
|
||||
'validation_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
def validate_business_rules(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate data against business rules."""
|
||||
try:
|
||||
validation_result = {
|
||||
'is_valid': True,
|
||||
'errors': [],
|
||||
'warnings': [],
|
||||
'field_validations': {},
|
||||
'validation_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
for field_name, rules in self.business_rules.items():
|
||||
if field_name in data:
|
||||
field_validation = self.validate_field(
|
||||
field_name,
|
||||
data[field_name],
|
||||
**rules
|
||||
)
|
||||
validation_result['field_validations'][field_name] = field_validation
|
||||
|
||||
if not field_validation['is_valid']:
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].extend(field_validation['errors'])
|
||||
|
||||
validation_result['warnings'].extend(field_validation['warnings'])
|
||||
elif rules.get('required', False):
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Required field '{field_name}' is missing")
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating business rules: {str(e)}")
|
||||
return {
|
||||
'is_valid': False,
|
||||
'errors': [f"Business rule validation error: {str(e)}"],
|
||||
'warnings': [],
|
||||
'field_validations': {},
|
||||
'validation_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
def validate_strategy_data(self, strategy_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate content strategy data specifically."""
|
||||
try:
|
||||
validation_result = {
|
||||
'is_valid': True,
|
||||
'errors': [],
|
||||
'warnings': [],
|
||||
'field_validations': {},
|
||||
'validation_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Required fields for content strategy
|
||||
required_fields = [
|
||||
'business_objectives', 'target_metrics', 'content_budget',
|
||||
'team_size', 'implementation_timeline'
|
||||
]
|
||||
|
||||
for field in required_fields:
|
||||
if field not in strategy_data or strategy_data[field] is None or strategy_data[field] == '':
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Required field '{field}' is missing")
|
||||
else:
|
||||
# Validate specific field types
|
||||
if field == 'content_budget':
|
||||
field_validation = self.validate_field(field, strategy_data[field], 'numeric', min_value=0, max_value=1000000)
|
||||
elif field == 'team_size':
|
||||
field_validation = self.validate_field(field, strategy_data[field], 'integer', min_value=1, max_value=100)
|
||||
elif field == 'implementation_timeline':
|
||||
field_validation = self.validate_field(field, strategy_data[field], 'string', min_length=1, max_length=500)
|
||||
else:
|
||||
field_validation = self.validate_field(field, strategy_data[field], 'string', min_length=1)
|
||||
|
||||
validation_result['field_validations'][field] = field_validation
|
||||
|
||||
if not field_validation['is_valid']:
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].extend(field_validation['errors'])
|
||||
|
||||
validation_result['warnings'].extend(field_validation['warnings'])
|
||||
|
||||
# Validate optional fields
|
||||
optional_fields = {
|
||||
'market_share': ('numeric', {'min_value': 0, 'max_value': 100}),
|
||||
'competitive_position': ('string', {'max_length': 1000}),
|
||||
'content_preferences': ('string', {'max_length': 2000}),
|
||||
'audience_pain_points': ('string', {'max_length': 2000}),
|
||||
'top_competitors': ('string', {'max_length': 1000}),
|
||||
'industry_trends': ('string', {'max_length': 1000})
|
||||
}
|
||||
|
||||
for field, (field_type, validation_params) in optional_fields.items():
|
||||
if field in strategy_data and strategy_data[field]:
|
||||
field_validation = self.validate_field(field, strategy_data[field], field_type, **validation_params)
|
||||
validation_result['field_validations'][field] = field_validation
|
||||
|
||||
if not field_validation['is_valid']:
|
||||
validation_result['warnings'].extend(field_validation['errors'])
|
||||
|
||||
validation_result['warnings'].extend(field_validation['warnings'])
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating strategy data: {str(e)}")
|
||||
return {
|
||||
'is_valid': False,
|
||||
'errors': [f"Strategy validation error: {str(e)}"],
|
||||
'warnings': [],
|
||||
'field_validations': {},
|
||||
'validation_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
def _validate_email(self, field_name: str, value: str, validation_result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate email format."""
|
||||
try:
|
||||
if not self.validation_patterns['email'].match(value):
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must be a valid email address")
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating email: {str(e)}")
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Email validation error: {str(e)}")
|
||||
return validation_result
|
||||
|
||||
def _validate_url(self, field_name: str, value: str, validation_result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate URL format."""
|
||||
try:
|
||||
if not self.validation_patterns['url'].match(value):
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must be a valid URL")
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating URL: {str(e)}")
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"URL validation error: {str(e)}")
|
||||
return validation_result
|
||||
|
||||
def _validate_phone(self, field_name: str, value: str, validation_result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate phone number format."""
|
||||
try:
|
||||
if not self.validation_patterns['phone'].match(value):
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must be a valid phone number")
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating phone: {str(e)}")
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Phone validation error: {str(e)}")
|
||||
return validation_result
|
||||
|
||||
def _validate_domain(self, field_name: str, value: str, validation_result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate domain format."""
|
||||
try:
|
||||
if not self.validation_patterns['domain'].match(value):
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must be a valid domain")
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating domain: {str(e)}")
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Domain validation error: {str(e)}")
|
||||
return validation_result
|
||||
|
||||
def _validate_alphanumeric(self, field_name: str, value: str, validation_result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate alphanumeric format."""
|
||||
try:
|
||||
if not self.validation_patterns['alphanumeric'].match(value):
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must contain only letters, numbers, and spaces")
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating alphanumeric: {str(e)}")
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Alphanumeric validation error: {str(e)}")
|
||||
return validation_result
|
||||
|
||||
def _validate_numeric(self, field_name: str, value: Union[str, int, float], validation_result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate numeric format."""
|
||||
try:
|
||||
if isinstance(value, (int, float)):
|
||||
return validation_result
|
||||
|
||||
if not self.validation_patterns['numeric'].match(str(value)):
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must be a valid number")
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating numeric: {str(e)}")
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Numeric validation error: {str(e)}")
|
||||
return validation_result
|
||||
|
||||
def _validate_integer(self, field_name: str, value: Union[str, int], validation_result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate integer format."""
|
||||
try:
|
||||
if isinstance(value, int):
|
||||
return validation_result
|
||||
|
||||
if not self.validation_patterns['integer'].match(str(value)):
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must be a valid integer")
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating integer: {str(e)}")
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Integer validation error: {str(e)}")
|
||||
return validation_result
|
||||
|
||||
def _validate_date(self, field_name: str, value: Union[str, datetime], validation_result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate date format."""
|
||||
try:
|
||||
if isinstance(value, datetime):
|
||||
return validation_result
|
||||
|
||||
# Try to parse date string
|
||||
try:
|
||||
datetime.fromisoformat(str(value).replace('Z', '+00:00'))
|
||||
except ValueError:
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must be a valid date")
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating date: {str(e)}")
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Date validation error: {str(e)}")
|
||||
return validation_result
|
||||
|
||||
def _validate_json(self, field_name: str, value: Union[str, dict, list], validation_result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate JSON format."""
|
||||
try:
|
||||
if isinstance(value, (dict, list)):
|
||||
return validation_result
|
||||
|
||||
import json
|
||||
try:
|
||||
json.loads(str(value))
|
||||
except json.JSONDecodeError:
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must be valid JSON")
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating JSON: {str(e)}")
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"JSON validation error: {str(e)}")
|
||||
return validation_result
|
||||
|
||||
def _validate_string(self, field_name: str, value: str, validation_result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate string format."""
|
||||
try:
|
||||
if not isinstance(value, str):
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"Field '{field_name}' must be a string")
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating string: {str(e)}")
|
||||
validation_result['is_valid'] = False
|
||||
validation_result['errors'].append(f"String validation error: {str(e)}")
|
||||
return validation_result
|
||||
|
||||
def generate_validation_error_message(self, validation_result: Dict[str, Any]) -> str:
|
||||
"""Generate a user-friendly error message from validation results."""
|
||||
try:
|
||||
if validation_result['is_valid']:
|
||||
return "Validation passed successfully"
|
||||
|
||||
if 'errors' in validation_result and validation_result['errors']:
|
||||
error_count = len(validation_result['errors'])
|
||||
if error_count == 1:
|
||||
return f"Validation error: {validation_result['errors'][0]}"
|
||||
else:
|
||||
return f"Validation failed with {error_count} errors: {'; '.join(validation_result['errors'])}"
|
||||
|
||||
return "Validation failed with unknown errors"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating validation error message: {str(e)}")
|
||||
return "Error generating validation message"
|
||||
|
||||
def get_validation_summary(self, validation_results: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""Generate a summary of multiple validation results."""
|
||||
try:
|
||||
summary = {
|
||||
'total_validations': len(validation_results),
|
||||
'passed_validations': 0,
|
||||
'failed_validations': 0,
|
||||
'total_errors': 0,
|
||||
'total_warnings': 0,
|
||||
'field_summary': {},
|
||||
'validation_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
for result in validation_results:
|
||||
if result.get('is_valid', False):
|
||||
summary['passed_validations'] += 1
|
||||
else:
|
||||
summary['failed_validations'] += 1
|
||||
|
||||
summary['total_errors'] += len(result.get('errors', []))
|
||||
summary['total_warnings'] += len(result.get('warnings', []))
|
||||
|
||||
field_name = result.get('field_name', 'unknown')
|
||||
if field_name not in summary['field_summary']:
|
||||
summary['field_summary'][field_name] = {
|
||||
'validations': 0,
|
||||
'errors': 0,
|
||||
'warnings': 0
|
||||
}
|
||||
|
||||
summary['field_summary'][field_name]['validations'] += 1
|
||||
summary['field_summary'][field_name]['errors'] += len(result.get('errors', []))
|
||||
summary['field_summary'][field_name]['warnings'] += len(result.get('warnings', []))
|
||||
|
||||
return summary
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating validation summary: {str(e)}")
|
||||
return {
|
||||
'total_validations': 0,
|
||||
'passed_validations': 0,
|
||||
'failed_validations': 0,
|
||||
'total_errors': 0,
|
||||
'total_warnings': 0,
|
||||
'field_summary': {},
|
||||
'validation_timestamp': datetime.utcnow().isoformat(),
|
||||
'error': str(e)
|
||||
}
|
||||
@@ -0,0 +1,232 @@
|
||||
"""
|
||||
Enhanced Strategy Database Service
|
||||
Handles database operations for enhanced content strategy functionality.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, List, Any, Optional
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_, or_
|
||||
|
||||
# Import database models
|
||||
from models.enhanced_strategy_models import EnhancedContentStrategy, EnhancedAIAnalysisResult, OnboardingDataIntegration
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class EnhancedStrategyDBService:
|
||||
"""Database service for enhanced content strategy operations."""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
|
||||
async def get_enhanced_strategy(self, strategy_id: int) -> Optional[EnhancedContentStrategy]:
|
||||
"""Get an enhanced strategy by ID."""
|
||||
try:
|
||||
return self.db.query(EnhancedContentStrategy).filter(EnhancedContentStrategy.id == strategy_id).first()
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting enhanced strategy {strategy_id}: {str(e)}")
|
||||
return None
|
||||
|
||||
async def get_enhanced_strategies(self, user_id: Optional[int] = None, strategy_id: Optional[int] = None) -> List[EnhancedContentStrategy]:
|
||||
"""Get enhanced strategies with optional filtering."""
|
||||
try:
|
||||
query = self.db.query(EnhancedContentStrategy)
|
||||
|
||||
if user_id:
|
||||
query = query.filter(EnhancedContentStrategy.user_id == user_id)
|
||||
|
||||
if strategy_id:
|
||||
query = query.filter(EnhancedContentStrategy.id == strategy_id)
|
||||
|
||||
return query.all()
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting enhanced strategies: {str(e)}")
|
||||
return []
|
||||
|
||||
async def create_enhanced_strategy(self, strategy_data: Dict[str, Any]) -> Optional[EnhancedContentStrategy]:
|
||||
"""Create a new enhanced strategy."""
|
||||
try:
|
||||
strategy = EnhancedContentStrategy(**strategy_data)
|
||||
self.db.add(strategy)
|
||||
self.db.commit()
|
||||
self.db.refresh(strategy)
|
||||
return strategy
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating enhanced strategy: {str(e)}")
|
||||
self.db.rollback()
|
||||
return None
|
||||
|
||||
async def update_enhanced_strategy(self, strategy_id: int, update_data: Dict[str, Any]) -> Optional[EnhancedContentStrategy]:
|
||||
"""Update an enhanced strategy."""
|
||||
try:
|
||||
strategy = await self.get_enhanced_strategy(strategy_id)
|
||||
if not strategy:
|
||||
return None
|
||||
|
||||
for key, value in update_data.items():
|
||||
if hasattr(strategy, key):
|
||||
setattr(strategy, key, value)
|
||||
|
||||
strategy.updated_at = datetime.utcnow()
|
||||
self.db.commit()
|
||||
self.db.refresh(strategy)
|
||||
return strategy
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating enhanced strategy {strategy_id}: {str(e)}")
|
||||
self.db.rollback()
|
||||
return None
|
||||
|
||||
async def delete_enhanced_strategy(self, strategy_id: int) -> bool:
|
||||
"""Delete an enhanced strategy."""
|
||||
try:
|
||||
strategy = await self.get_enhanced_strategy(strategy_id)
|
||||
if not strategy:
|
||||
return False
|
||||
|
||||
self.db.delete(strategy)
|
||||
self.db.commit()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting enhanced strategy {strategy_id}: {str(e)}")
|
||||
self.db.rollback()
|
||||
return False
|
||||
|
||||
async def get_enhanced_strategies_with_analytics(self, strategy_id: Optional[int] = None) -> List[Dict[str, Any]]:
|
||||
"""Get enhanced strategies with analytics data."""
|
||||
try:
|
||||
strategies = await self.get_enhanced_strategies(strategy_id=strategy_id)
|
||||
result = []
|
||||
|
||||
for strategy in strategies:
|
||||
strategy_dict = strategy.to_dict() if hasattr(strategy, 'to_dict') else {
|
||||
'id': strategy.id,
|
||||
'name': strategy.name,
|
||||
'industry': strategy.industry,
|
||||
'user_id': strategy.user_id,
|
||||
'created_at': strategy.created_at.isoformat() if strategy.created_at else None,
|
||||
'updated_at': strategy.updated_at.isoformat() if strategy.updated_at else None
|
||||
}
|
||||
|
||||
# Add analytics data
|
||||
analytics = await self.get_ai_analysis_history(strategy.id, limit=5)
|
||||
strategy_dict['analytics'] = analytics
|
||||
|
||||
result.append(strategy_dict)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting enhanced strategies with analytics: {str(e)}")
|
||||
return []
|
||||
|
||||
async def get_ai_analysis_history(self, strategy_id: int, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
"""Get AI analysis history for a strategy."""
|
||||
try:
|
||||
analyses = self.db.query(EnhancedAIAnalysisResult).filter(
|
||||
EnhancedAIAnalysisResult.strategy_id == strategy_id
|
||||
).order_by(EnhancedAIAnalysisResult.created_at.desc()).limit(limit).all()
|
||||
|
||||
return [analysis.to_dict() if hasattr(analysis, 'to_dict') else {
|
||||
'id': analysis.id,
|
||||
'analysis_type': analysis.analysis_type,
|
||||
'insights': analysis.insights,
|
||||
'recommendations': analysis.recommendations,
|
||||
'created_at': analysis.created_at.isoformat() if analysis.created_at else None
|
||||
} for analysis in analyses]
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting AI analysis history for strategy {strategy_id}: {str(e)}")
|
||||
return []
|
||||
|
||||
async def get_onboarding_integration(self, strategy_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get onboarding integration data for a strategy."""
|
||||
try:
|
||||
integration = self.db.query(OnboardingDataIntegration).filter(
|
||||
OnboardingDataIntegration.strategy_id == strategy_id
|
||||
).first()
|
||||
|
||||
if integration:
|
||||
return integration.to_dict() if hasattr(integration, 'to_dict') else {
|
||||
'id': integration.id,
|
||||
'strategy_id': integration.strategy_id,
|
||||
'data_sources': integration.data_sources,
|
||||
'confidence_scores': integration.confidence_scores,
|
||||
'created_at': integration.created_at.isoformat() if integration.created_at else None
|
||||
}
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting onboarding integration for strategy {strategy_id}: {str(e)}")
|
||||
return None
|
||||
|
||||
async def get_strategy_completion_stats(self, user_id: int) -> Dict[str, Any]:
|
||||
"""Get completion statistics for all strategies of a user."""
|
||||
try:
|
||||
strategies = await self.get_enhanced_strategies(user_id=user_id)
|
||||
|
||||
total_strategies = len(strategies)
|
||||
completed_strategies = sum(1 for s in strategies if s.completion_percentage >= 80)
|
||||
avg_completion = sum(s.completion_percentage for s in strategies) / total_strategies if total_strategies > 0 else 0
|
||||
|
||||
return {
|
||||
'total_strategies': total_strategies,
|
||||
'completed_strategies': completed_strategies,
|
||||
'avg_completion_percentage': avg_completion,
|
||||
'user_id': user_id
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting strategy completion stats for user {user_id}: {str(e)}")
|
||||
return {
|
||||
'total_strategies': 0,
|
||||
'completed_strategies': 0,
|
||||
'avg_completion_percentage': 0,
|
||||
'user_id': user_id
|
||||
}
|
||||
|
||||
async def search_enhanced_strategies(self, user_id: int, search_term: str) -> List[EnhancedContentStrategy]:
|
||||
"""Search enhanced strategies by name or industry."""
|
||||
try:
|
||||
return self.db.query(EnhancedContentStrategy).filter(
|
||||
and_(
|
||||
EnhancedContentStrategy.user_id == user_id,
|
||||
or_(
|
||||
EnhancedContentStrategy.name.ilike(f"%{search_term}%"),
|
||||
EnhancedContentStrategy.industry.ilike(f"%{search_term}%")
|
||||
)
|
||||
)
|
||||
).all()
|
||||
except Exception as e:
|
||||
logger.error(f"Error searching enhanced strategies: {str(e)}")
|
||||
return []
|
||||
|
||||
async def get_strategy_export_data(self, strategy_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get comprehensive export data for a strategy."""
|
||||
try:
|
||||
strategy = await self.get_enhanced_strategy(strategy_id)
|
||||
if not strategy:
|
||||
return None
|
||||
|
||||
# Get strategy data
|
||||
strategy_data = strategy.to_dict() if hasattr(strategy, 'to_dict') else {
|
||||
'id': strategy.id,
|
||||
'name': strategy.name,
|
||||
'industry': strategy.industry,
|
||||
'user_id': strategy.user_id,
|
||||
'created_at': strategy.created_at.isoformat() if strategy.created_at else None,
|
||||
'updated_at': strategy.updated_at.isoformat() if strategy.updated_at else None
|
||||
}
|
||||
|
||||
# Get analytics data
|
||||
analytics = await self.get_ai_analysis_history(strategy_id, limit=10)
|
||||
|
||||
# Get onboarding integration
|
||||
onboarding = await self.get_onboarding_integration(strategy_id)
|
||||
|
||||
return {
|
||||
'strategy': strategy_data,
|
||||
'analytics': analytics,
|
||||
'onboarding_integration': onboarding,
|
||||
'exported_at': datetime.utcnow().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting strategy export data for strategy {strategy_id}: {str(e)}")
|
||||
return None
|
||||
2437
backend/api/content_planning/services/enhanced_strategy_service.py
Normal file
2437
backend/api/content_planning/services/enhanced_strategy_service.py
Normal file
File diff suppressed because it is too large
Load Diff
268
backend/api/content_planning/services/gap_analysis_service.py
Normal file
268
backend/api/content_planning/services/gap_analysis_service.py
Normal file
@@ -0,0 +1,268 @@
|
||||
"""
|
||||
Gap Analysis Service for Content Planning API
|
||||
Extracted business logic from the gap analysis route for better separation of concerns.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
# Import database services
|
||||
from services.content_planning_db import ContentPlanningDBService
|
||||
from services.ai_analysis_db_service import AIAnalysisDBService
|
||||
from services.onboarding_data_service import OnboardingDataService
|
||||
|
||||
# Import migrated content gap analysis services
|
||||
from services.content_gap_analyzer.content_gap_analyzer import ContentGapAnalyzer
|
||||
from services.content_gap_analyzer.competitor_analyzer import CompetitorAnalyzer
|
||||
from services.content_gap_analyzer.keyword_researcher import KeywordResearcher
|
||||
from services.content_gap_analyzer.ai_engine_service import AIEngineService
|
||||
from services.content_gap_analyzer.website_analyzer import WebsiteAnalyzer
|
||||
|
||||
# Import utilities
|
||||
from ..utils.error_handlers import ContentPlanningErrorHandler
|
||||
from ..utils.response_builders import ResponseBuilder
|
||||
from ..utils.constants import ERROR_MESSAGES, SUCCESS_MESSAGES
|
||||
|
||||
class GapAnalysisService:
|
||||
"""Service class for content gap analysis operations."""
|
||||
|
||||
def __init__(self):
|
||||
self.ai_analysis_db_service = AIAnalysisDBService()
|
||||
self.onboarding_service = OnboardingDataService()
|
||||
|
||||
# Initialize migrated services
|
||||
self.content_gap_analyzer = ContentGapAnalyzer()
|
||||
self.competitor_analyzer = CompetitorAnalyzer()
|
||||
self.keyword_researcher = KeywordResearcher()
|
||||
self.ai_engine_service = AIEngineService()
|
||||
self.website_analyzer = WebsiteAnalyzer()
|
||||
|
||||
async def create_gap_analysis(self, analysis_data: Dict[str, Any], db: Session) -> Dict[str, Any]:
|
||||
"""Create a new content gap analysis."""
|
||||
try:
|
||||
logger.info(f"Creating content gap analysis for: {analysis_data.get('website_url', 'Unknown')}")
|
||||
|
||||
db_service = ContentPlanningDBService(db)
|
||||
created_analysis = await db_service.create_content_gap_analysis(analysis_data)
|
||||
|
||||
if created_analysis:
|
||||
logger.info(f"Content gap analysis created successfully: {created_analysis.id}")
|
||||
return created_analysis.to_dict()
|
||||
else:
|
||||
raise Exception("Failed to create gap analysis")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating content gap analysis: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "create_gap_analysis")
|
||||
|
||||
async def get_gap_analyses(self, user_id: Optional[int] = None, strategy_id: Optional[int] = None, force_refresh: bool = False) -> Dict[str, Any]:
|
||||
"""Get content gap analysis with real AI insights - Database first approach."""
|
||||
try:
|
||||
logger.info(f"🚀 Starting content gap analysis for user: {user_id}, strategy: {strategy_id}, force_refresh: {force_refresh}")
|
||||
|
||||
# Use user_id or default to 1
|
||||
current_user_id = user_id or 1
|
||||
|
||||
# Skip database check if force_refresh is True
|
||||
if not force_refresh:
|
||||
# First, try to get existing gap analysis from database
|
||||
logger.info(f"🔍 Checking database for existing gap analysis for user {current_user_id}")
|
||||
existing_analysis = await self.ai_analysis_db_service.get_latest_ai_analysis(
|
||||
user_id=current_user_id,
|
||||
analysis_type="gap_analysis",
|
||||
strategy_id=strategy_id,
|
||||
max_age_hours=24 # Use cached results up to 24 hours old
|
||||
)
|
||||
|
||||
if existing_analysis:
|
||||
logger.info(f"✅ Found existing gap analysis in database: {existing_analysis.get('id', 'unknown')}")
|
||||
|
||||
# Return cached results
|
||||
return {
|
||||
"gap_analyses": [{"recommendations": existing_analysis.get('recommendations', [])}],
|
||||
"total_gaps": len(existing_analysis.get('recommendations', [])),
|
||||
"generated_at": existing_analysis.get('created_at', datetime.utcnow()).isoformat(),
|
||||
"ai_service_status": existing_analysis.get('ai_service_status', 'operational'),
|
||||
"personalized_data_used": True if existing_analysis.get('personalized_data_used') else False,
|
||||
"data_source": "database_cache",
|
||||
"cache_age_hours": (datetime.utcnow() - existing_analysis.get('created_at', datetime.utcnow())).total_seconds() / 3600
|
||||
}
|
||||
|
||||
# No recent analysis found or force refresh requested, run new AI analysis
|
||||
logger.info(f"🔄 Running new gap analysis for user {current_user_id} (force_refresh: {force_refresh})")
|
||||
|
||||
# Get personalized inputs from onboarding data
|
||||
personalized_inputs = self.onboarding_service.get_personalized_ai_inputs(current_user_id)
|
||||
|
||||
logger.info(f"📊 Using personalized inputs: {len(personalized_inputs)} data points")
|
||||
|
||||
# Generate real AI-powered gap analysis
|
||||
gap_analysis = await self.ai_engine_service.generate_content_recommendations(personalized_inputs)
|
||||
|
||||
logger.info(f"✅ AI gap analysis completed: {len(gap_analysis)} recommendations")
|
||||
|
||||
# Store results in database
|
||||
try:
|
||||
await self.ai_analysis_db_service.store_ai_analysis_result(
|
||||
user_id=current_user_id,
|
||||
analysis_type="gap_analysis",
|
||||
insights=[],
|
||||
recommendations=gap_analysis,
|
||||
personalized_data=personalized_inputs,
|
||||
strategy_id=strategy_id,
|
||||
ai_service_status="operational"
|
||||
)
|
||||
logger.info(f"💾 Gap analysis results stored in database for user {current_user_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Failed to store gap analysis in database: {str(e)}")
|
||||
|
||||
return {
|
||||
"gap_analyses": [{"recommendations": gap_analysis}],
|
||||
"total_gaps": len(gap_analysis),
|
||||
"generated_at": datetime.utcnow().isoformat(),
|
||||
"ai_service_status": "operational",
|
||||
"personalized_data_used": True,
|
||||
"data_source": "ai_analysis"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error generating content gap analysis: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "get_gap_analyses")
|
||||
|
||||
async def get_gap_analysis_by_id(self, analysis_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Get a specific content gap analysis by ID."""
|
||||
try:
|
||||
logger.info(f"Fetching content gap analysis: {analysis_id}")
|
||||
|
||||
db_service = ContentPlanningDBService(db)
|
||||
analysis = await db_service.get_content_gap_analysis(analysis_id)
|
||||
|
||||
if analysis:
|
||||
return analysis.to_dict()
|
||||
else:
|
||||
raise ContentPlanningErrorHandler.handle_not_found_error("Content gap analysis", analysis_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting content gap analysis: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "get_gap_analysis_by_id")
|
||||
|
||||
async def analyze_content_gaps(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Analyze content gaps between your website and competitors."""
|
||||
try:
|
||||
logger.info(f"Starting content gap analysis for: {request_data.get('website_url', 'Unknown')}")
|
||||
|
||||
# Use migrated services for actual analysis
|
||||
analysis_results = {}
|
||||
|
||||
# 1. Website Analysis
|
||||
logger.info("Performing website analysis...")
|
||||
website_analysis = await self.website_analyzer.analyze_website_content(request_data.get('website_url'))
|
||||
analysis_results['website_analysis'] = website_analysis
|
||||
|
||||
# 2. Competitor Analysis
|
||||
logger.info("Performing competitor analysis...")
|
||||
competitor_analysis = await self.competitor_analyzer.analyze_competitors(request_data.get('competitor_urls', []))
|
||||
analysis_results['competitor_analysis'] = competitor_analysis
|
||||
|
||||
# 3. Keyword Research
|
||||
logger.info("Performing keyword research...")
|
||||
keyword_analysis = await self.keyword_researcher.research_keywords(
|
||||
industry=request_data.get('industry'),
|
||||
target_keywords=request_data.get('target_keywords')
|
||||
)
|
||||
analysis_results['keyword_analysis'] = keyword_analysis
|
||||
|
||||
# 4. Content Gap Analysis
|
||||
logger.info("Performing content gap analysis...")
|
||||
gap_analysis = await self.content_gap_analyzer.identify_content_gaps(
|
||||
website_url=request_data.get('website_url'),
|
||||
competitor_urls=request_data.get('competitor_urls', []),
|
||||
keyword_data=keyword_analysis
|
||||
)
|
||||
analysis_results['gap_analysis'] = gap_analysis
|
||||
|
||||
# 5. AI-Powered Recommendations
|
||||
logger.info("Generating AI recommendations...")
|
||||
recommendations = await self.ai_engine_service.generate_recommendations(
|
||||
website_analysis=website_analysis,
|
||||
competitor_analysis=competitor_analysis,
|
||||
gap_analysis=gap_analysis,
|
||||
keyword_analysis=keyword_analysis
|
||||
)
|
||||
analysis_results['recommendations'] = recommendations
|
||||
|
||||
# 6. Strategic Opportunities
|
||||
logger.info("Identifying strategic opportunities...")
|
||||
opportunities = await self.ai_engine_service.identify_strategic_opportunities(
|
||||
gap_analysis=gap_analysis,
|
||||
competitor_analysis=competitor_analysis,
|
||||
keyword_analysis=keyword_analysis
|
||||
)
|
||||
analysis_results['opportunities'] = opportunities
|
||||
|
||||
# Prepare response
|
||||
response_data = {
|
||||
'website_analysis': analysis_results['website_analysis'],
|
||||
'competitor_analysis': analysis_results['competitor_analysis'],
|
||||
'gap_analysis': analysis_results['gap_analysis'],
|
||||
'recommendations': analysis_results['recommendations'],
|
||||
'opportunities': analysis_results['opportunities'],
|
||||
'created_at': datetime.utcnow()
|
||||
}
|
||||
|
||||
logger.info(f"Content gap analysis completed successfully")
|
||||
return response_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing content gaps: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "analyze_content_gaps")
|
||||
|
||||
async def get_user_gap_analyses(self, user_id: int, db: Session) -> List[Dict[str, Any]]:
|
||||
"""Get all gap analyses for a specific user."""
|
||||
try:
|
||||
logger.info(f"Fetching gap analyses for user: {user_id}")
|
||||
|
||||
db_service = ContentPlanningDBService(db)
|
||||
analyses = await db_service.get_user_content_gap_analyses(user_id)
|
||||
|
||||
return [analysis.to_dict() for analysis in analyses]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting user gap analyses: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "get_user_gap_analyses")
|
||||
|
||||
async def update_gap_analysis(self, analysis_id: int, update_data: Dict[str, Any], db: Session) -> Dict[str, Any]:
|
||||
"""Update a content gap analysis."""
|
||||
try:
|
||||
logger.info(f"Updating content gap analysis: {analysis_id}")
|
||||
|
||||
db_service = ContentPlanningDBService(db)
|
||||
updated_analysis = await db_service.update_content_gap_analysis(analysis_id, update_data)
|
||||
|
||||
if updated_analysis:
|
||||
return updated_analysis.to_dict()
|
||||
else:
|
||||
raise ContentPlanningErrorHandler.handle_not_found_error("Content gap analysis", analysis_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating content gap analysis: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "update_gap_analysis")
|
||||
|
||||
async def delete_gap_analysis(self, analysis_id: int, db: Session) -> bool:
|
||||
"""Delete a content gap analysis."""
|
||||
try:
|
||||
logger.info(f"Deleting content gap analysis: {analysis_id}")
|
||||
|
||||
db_service = ContentPlanningDBService(db)
|
||||
deleted = await db_service.delete_content_gap_analysis(analysis_id)
|
||||
|
||||
if deleted:
|
||||
return True
|
||||
else:
|
||||
raise ContentPlanningErrorHandler.handle_not_found_error("Content gap analysis", analysis_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting content gap analysis: {str(e)}")
|
||||
raise ContentPlanningErrorHandler.handle_general_error(e, "delete_gap_analysis")
|
||||
Reference in New Issue
Block a user