ALwrity Version 0.5.0 (Fastapi + React )

This commit is contained in:
ajaysi
2025-08-06 12:48:02 +05:30
parent f28a919caa
commit 32f97fa6b3
476 changed files with 115544 additions and 28747 deletions

View File

@@ -0,0 +1,342 @@
"""
AI Analytics Service for Content Planning API
Extracted business logic from the AI analytics route for better separation of concerns.
"""
from typing import Dict, Any, List, Optional
from datetime import datetime
from loguru import logger
from sqlalchemy.orm import Session
import time
# Import database services
from services.content_planning_db import ContentPlanningDBService
from services.ai_analysis_db_service import AIAnalysisDBService
from services.ai_analytics_service import AIAnalyticsService
from services.onboarding_data_service import OnboardingDataService
# Import utilities
from ..utils.error_handlers import ContentPlanningErrorHandler
from ..utils.response_builders import ResponseBuilder
from ..utils.constants import ERROR_MESSAGES, SUCCESS_MESSAGES
class ContentPlanningAIAnalyticsService:
"""Service class for AI analytics operations."""
def __init__(self):
self.ai_analysis_db_service = AIAnalysisDBService()
self.ai_analytics_service = AIAnalyticsService()
self.onboarding_service = OnboardingDataService()
async def analyze_content_evolution(self, strategy_id: int, time_period: str = "30d") -> Dict[str, Any]:
"""Analyze content evolution over time for a specific strategy."""
try:
logger.info(f"Starting content evolution analysis for strategy {strategy_id}")
# Perform content evolution analysis
evolution_analysis = await self.ai_analytics_service.analyze_content_evolution(
strategy_id=strategy_id,
time_period=time_period
)
# Prepare response
response_data = {
'analysis_type': 'content_evolution',
'strategy_id': strategy_id,
'results': evolution_analysis,
'recommendations': evolution_analysis.get('recommendations', []),
'analysis_date': datetime.utcnow()
}
logger.info(f"Content evolution analysis completed for strategy {strategy_id}")
return response_data
except Exception as e:
logger.error(f"Error analyzing content evolution: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "analyze_content_evolution")
async def analyze_performance_trends(self, strategy_id: int, metrics: Optional[List[str]] = None) -> Dict[str, Any]:
"""Analyze performance trends for content strategy."""
try:
logger.info(f"Starting performance trends analysis for strategy {strategy_id}")
# Perform performance trends analysis
trends_analysis = await self.ai_analytics_service.analyze_performance_trends(
strategy_id=strategy_id,
metrics=metrics
)
# Prepare response
response_data = {
'analysis_type': 'performance_trends',
'strategy_id': strategy_id,
'results': trends_analysis,
'recommendations': trends_analysis.get('recommendations', []),
'analysis_date': datetime.utcnow()
}
logger.info(f"Performance trends analysis completed for strategy {strategy_id}")
return response_data
except Exception as e:
logger.error(f"Error analyzing performance trends: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "analyze_performance_trends")
async def predict_content_performance(self, strategy_id: int, content_data: Dict[str, Any]) -> Dict[str, Any]:
"""Predict content performance using AI models."""
try:
logger.info(f"Starting content performance prediction for strategy {strategy_id}")
# Perform content performance prediction
prediction_results = await self.ai_analytics_service.predict_content_performance(
content_data=content_data,
strategy_id=strategy_id
)
# Prepare response
response_data = {
'analysis_type': 'content_performance_prediction',
'strategy_id': strategy_id,
'results': prediction_results,
'recommendations': prediction_results.get('optimization_recommendations', []),
'analysis_date': datetime.utcnow()
}
logger.info(f"Content performance prediction completed for strategy {strategy_id}")
return response_data
except Exception as e:
logger.error(f"Error predicting content performance: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "predict_content_performance")
async def generate_strategic_intelligence(self, strategy_id: int, market_data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""Generate strategic intelligence for content planning."""
try:
logger.info(f"Starting strategic intelligence generation for strategy {strategy_id}")
# Generate strategic intelligence
intelligence_results = await self.ai_analytics_service.generate_strategic_intelligence(
strategy_id=strategy_id,
market_data=market_data
)
# Prepare response
response_data = {
'analysis_type': 'strategic_intelligence',
'strategy_id': strategy_id,
'results': intelligence_results,
'recommendations': [], # Strategic intelligence includes its own recommendations
'analysis_date': datetime.utcnow()
}
logger.info(f"Strategic intelligence generation completed for strategy {strategy_id}")
return response_data
except Exception as e:
logger.error(f"Error generating strategic intelligence: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "generate_strategic_intelligence")
async def get_ai_analytics(self, user_id: Optional[int] = None, strategy_id: Optional[int] = None, force_refresh: bool = False) -> Dict[str, Any]:
"""Get AI analytics with real personalized insights - Database first approach."""
try:
logger.info(f"🚀 Starting AI analytics for user: {user_id}, strategy: {strategy_id}, force_refresh: {force_refresh}")
start_time = time.time()
# Use user_id or default to 1
current_user_id = user_id or 1
# Skip database check if force_refresh is True
if not force_refresh:
# First, try to get existing AI analysis from database
logger.info(f"🔍 Checking database for existing AI analysis for user {current_user_id}")
existing_analysis = await self.ai_analysis_db_service.get_latest_ai_analysis(
user_id=current_user_id,
analysis_type="comprehensive_analysis",
strategy_id=strategy_id,
max_age_hours=24 # Use cached results up to 24 hours old
)
if existing_analysis:
logger.info(f"✅ Found existing AI analysis in database: {existing_analysis.get('id', 'unknown')}")
# Return cached results
return {
"insights": existing_analysis.get('insights', []),
"recommendations": existing_analysis.get('recommendations', []),
"total_insights": len(existing_analysis.get('insights', [])),
"total_recommendations": len(existing_analysis.get('recommendations', [])),
"generated_at": existing_analysis.get('created_at', datetime.utcnow()).isoformat(),
"ai_service_status": existing_analysis.get('ai_service_status', 'operational'),
"processing_time": f"{existing_analysis.get('processing_time', 0):.2f}s" if existing_analysis.get('processing_time') else "cached",
"personalized_data_used": True if existing_analysis.get('personalized_data_used') else False,
"data_source": "database_cache",
"cache_age_hours": (datetime.utcnow() - existing_analysis.get('created_at', datetime.utcnow())).total_seconds() / 3600,
"user_profile": existing_analysis.get('personalized_data_used', {})
}
# No recent analysis found or force refresh requested, run new AI analysis
logger.info(f"🔄 Running new AI analysis for user {current_user_id} (force_refresh: {force_refresh})")
# Get personalized inputs from onboarding data
personalized_inputs = self.onboarding_service.get_personalized_ai_inputs(current_user_id)
logger.info(f"📊 Using personalized inputs: {len(personalized_inputs)} data points")
# Generate real AI insights using personalized data
logger.info("🔍 Generating performance analysis...")
performance_analysis = await self.ai_analytics_service.analyze_performance_trends(
strategy_id=strategy_id or 1
)
logger.info("🧠 Generating strategic intelligence...")
strategic_intelligence = await self.ai_analytics_service.generate_strategic_intelligence(
strategy_id=strategy_id or 1
)
logger.info("📈 Analyzing content evolution...")
evolution_analysis = await self.ai_analytics_service.analyze_content_evolution(
strategy_id=strategy_id or 1
)
# Combine all insights
insights = []
recommendations = []
if performance_analysis:
insights.extend(performance_analysis.get('insights', []))
if strategic_intelligence:
insights.extend(strategic_intelligence.get('insights', []))
if evolution_analysis:
insights.extend(evolution_analysis.get('insights', []))
total_time = time.time() - start_time
logger.info(f"🎉 AI analytics completed in {total_time:.2f}s: {len(insights)} insights, {len(recommendations)} recommendations")
# Store results in database
try:
await self.ai_analysis_db_service.store_ai_analysis_result(
user_id=current_user_id,
analysis_type="comprehensive_analysis",
insights=insights,
recommendations=recommendations,
performance_metrics=performance_analysis,
personalized_data=personalized_inputs,
processing_time=total_time,
strategy_id=strategy_id,
ai_service_status="operational" if len(insights) > 0 else "fallback"
)
logger.info(f"💾 AI analysis results stored in database for user {current_user_id}")
except Exception as e:
logger.error(f"❌ Failed to store AI analysis in database: {str(e)}")
return {
"insights": insights,
"recommendations": recommendations,
"total_insights": len(insights),
"total_recommendations": len(recommendations),
"generated_at": datetime.utcnow().isoformat(),
"ai_service_status": "operational" if len(insights) > 0 else "fallback",
"processing_time": f"{total_time:.2f}s",
"personalized_data_used": True,
"data_source": "ai_analysis",
"user_profile": {
"website_url": personalized_inputs.get('website_analysis', {}).get('website_url', ''),
"content_types": personalized_inputs.get('website_analysis', {}).get('content_types', []),
"target_audience": personalized_inputs.get('website_analysis', {}).get('target_audience', []),
"industry_focus": personalized_inputs.get('website_analysis', {}).get('industry_focus', 'general')
}
}
except Exception as e:
logger.error(f"❌ Error generating AI analytics: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "get_ai_analytics")
async def get_user_ai_analysis_results(self, user_id: int, analysis_type: Optional[str] = None, limit: int = 10) -> Dict[str, Any]:
"""Get AI analysis results for a specific user."""
try:
logger.info(f"Fetching AI analysis results for user {user_id}")
analysis_types = [analysis_type] if analysis_type else None
results = await self.ai_analysis_db_service.get_user_ai_analyses(
user_id=user_id,
analysis_types=analysis_types,
limit=limit
)
return {
"user_id": user_id,
"results": [result.to_dict() for result in results],
"total_results": len(results)
}
except Exception as e:
logger.error(f"Error fetching AI analysis results: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "get_user_ai_analysis_results")
async def refresh_ai_analysis(self, user_id: int, analysis_type: str, strategy_id: Optional[int] = None) -> Dict[str, Any]:
"""Force refresh of AI analysis for a user."""
try:
logger.info(f"Force refreshing AI analysis for user {user_id}, type: {analysis_type}")
# Delete existing analysis to force refresh
await self.ai_analysis_db_service.delete_old_ai_analyses(days_old=0)
# Run new analysis based on type
if analysis_type == "comprehensive_analysis":
# This will trigger a new comprehensive analysis
return {"message": f"AI analysis refresh initiated for user {user_id}"}
elif analysis_type == "gap_analysis":
# This will trigger a new gap analysis
return {"message": f"Gap analysis refresh initiated for user {user_id}"}
elif analysis_type == "strategic_intelligence":
# This will trigger a new strategic intelligence analysis
return {"message": f"Strategic intelligence refresh initiated for user {user_id}"}
else:
raise Exception(f"Unknown analysis type: {analysis_type}")
except Exception as e:
logger.error(f"Error refreshing AI analysis: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "refresh_ai_analysis")
async def clear_ai_analysis_cache(self, user_id: int, analysis_type: Optional[str] = None) -> Dict[str, Any]:
"""Clear AI analysis cache for a user."""
try:
logger.info(f"Clearing AI analysis cache for user {user_id}")
if analysis_type:
# Clear specific analysis type
deleted_count = await self.ai_analysis_db_service.delete_old_ai_analyses(days_old=0)
return {"message": f"Cleared {deleted_count} cached results for user {user_id}"}
else:
# Clear all cached results
deleted_count = await self.ai_analysis_db_service.delete_old_ai_analyses(days_old=0)
return {"message": f"Cleared {deleted_count} cached results for user {user_id}"}
except Exception as e:
logger.error(f"Error clearing AI analysis cache: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "clear_ai_analysis_cache")
async def get_ai_analysis_statistics(self, user_id: Optional[int] = None) -> Dict[str, Any]:
"""Get AI analysis statistics."""
try:
logger.info(f"📊 Getting AI analysis statistics for user: {user_id}")
if user_id:
# Get user-specific statistics
user_stats = await self.ai_analysis_db_service.get_analysis_statistics(user_id)
return {
"user_id": user_id,
"statistics": user_stats,
"message": "User-specific AI analysis statistics retrieved successfully"
}
else:
# Get global statistics
global_stats = await self.ai_analysis_db_service.get_analysis_statistics()
return {
"statistics": global_stats,
"message": "Global AI analysis statistics retrieved successfully"
}
except Exception as e:
logger.error(f"❌ Error getting AI analysis statistics: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "get_ai_analysis_statistics")

View File

@@ -0,0 +1,407 @@
"""
Calendar Generation Service for Content Planning API
Extracted business logic from the calendar generation route for better separation of concerns.
"""
from typing import Dict, Any, List, Optional
from datetime import datetime
from loguru import logger
from sqlalchemy.orm import Session
import time
# Import database service
from services.content_planning_db import ContentPlanningDBService
# Import calendar generator service
from services.calendar_generator_service import CalendarGeneratorService
# Import validation service
from services.validation import check_all_api_keys
# Import utilities
from ..utils.error_handlers import ContentPlanningErrorHandler
from ..utils.response_builders import ResponseBuilder
from ..utils.constants import ERROR_MESSAGES, SUCCESS_MESSAGES
class CalendarGenerationService:
"""Service class for calendar generation operations."""
def __init__(self):
self.calendar_generator_service = CalendarGeneratorService()
async def generate_comprehensive_calendar(self, user_id: int, strategy_id: Optional[int] = None,
calendar_type: str = "monthly", industry: Optional[str] = None,
business_size: str = "sme") -> Dict[str, Any]:
"""Generate a comprehensive AI-powered content calendar using database insights."""
try:
logger.info(f"🎯 Generating comprehensive calendar for user {user_id}")
start_time = time.time()
# Generate calendar using advanced AI-powered method
calendar_data = await self.calendar_generator_service.generate_ai_powered_calendar(
user_id=user_id,
strategy_id=strategy_id,
calendar_type=calendar_type,
industry=industry,
business_size=business_size
)
processing_time = time.time() - start_time
logger.info(f"✅ Calendar generated successfully in {processing_time:.2f}s")
return calendar_data
except Exception as e:
logger.error(f"❌ Error generating comprehensive calendar: {str(e)}")
logger.error(f"Exception type: {type(e)}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
raise ContentPlanningErrorHandler.handle_general_error(e, "generate_comprehensive_calendar")
async def optimize_content_for_platform(self, user_id: int, title: str, description: str,
content_type: str, target_platform: str, event_id: Optional[int] = None) -> Dict[str, Any]:
"""Optimize content for specific platforms using database insights."""
try:
logger.info(f"🔧 Starting content optimization for user {user_id}")
# Validate API keys - temporarily disabled for testing
# from services.api_key_manager import APIKeyManager
# api_manager = APIKeyManager()
# api_key_status = check_all_api_keys(api_manager)
# if not api_key_status.get("all_valid", False):
# raise Exception("AI services are not properly configured")
# Get user data for optimization
user_data = await self.calendar_generator_service._get_comprehensive_user_data(
user_id,
None # No strategy_id for content optimization
)
# Create optimization request for AI
optimization_prompt = f"""
Optimize the following content for {target_platform}:
Original Content:
- Title: {title}
- Description: {description}
- Content Type: {content_type}
- Platform: {target_platform}
User Context:
- Industry: {user_data.get('industry', 'technology')}
- Target Audience: {user_data.get('target_audience', {})}
- Performance Data: {user_data.get('performance_data', {})}
- Gap Analysis: {user_data.get('gap_analysis', {})}
Provide comprehensive optimization including:
1. Platform-specific adaptations
2. Visual recommendations
3. Hashtag suggestions
4. Keyword optimization
5. Tone adjustments
6. Length optimization
7. Performance predictions
"""
# Generate optimization using AI
optimization_result = await self.calendar_generator_service.ai_engine.generate_content_recommendations(
analysis_data={
"original_content": {
"title": title,
"description": description,
"content_type": content_type,
"target_platform": target_platform
},
"user_context": {
"industry": user_data.get('industry', 'technology'),
"target_audience": user_data.get('target_audience', {}),
"performance_data": user_data.get('performance_data', {}),
"gap_analysis": user_data.get('gap_analysis', {})
}
}
)
# Prepare response
response_data = {
"user_id": user_id,
"event_id": event_id,
"original_content": {
"title": title,
"description": description,
"content_type": content_type,
"target_platform": target_platform
},
"optimized_content": {
"title": title,
"description": description,
"content_type": content_type,
"target_platform": target_platform
},
"platform_adaptations": [rec.get('description', '') for rec in optimization_result[:3]],
"visual_recommendations": ["Use engaging visuals", "Include relevant images", "Optimize for mobile"],
"hashtag_suggestions": ["#content", "#marketing", "#digital"],
"keyword_optimization": {"primary": "content", "secondary": ["marketing", "digital"]},
"tone_adjustments": {"tone": "professional", "style": "informative"},
"length_optimization": {"optimal_length": "150-300 words", "format": "paragraphs"},
"performance_prediction": {"engagement_rate": 0.05, "reach": 1000},
"optimization_score": 0.8,
"created_at": datetime.utcnow()
}
logger.info(f"✅ Content optimization completed for user {user_id}")
return response_data
except Exception as e:
logger.error(f"❌ Error optimizing content: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "optimize_content_for_platform")
async def predict_content_performance(self, user_id: int, content_type: str, platform: str,
content_data: Dict[str, Any], strategy_id: Optional[int] = None) -> Dict[str, Any]:
"""Predict content performance using database insights."""
try:
logger.info(f"📊 Starting performance prediction for user {user_id}")
# Get user data for prediction
user_data = await self.calendar_generator_service._get_comprehensive_user_data(
user_id,
strategy_id
)
# Generate performance prediction
prediction_prompt = f"""
Predict performance for the following content:
Content Data:
- Content Type: {content_type}
- Platform: {platform}
- Content Data: {content_data}
User Context:
- Industry: {user_data.get('industry', 'technology')}
- Performance Data: {user_data.get('performance_data', {})}
- Gap Analysis: {user_data.get('gap_analysis', {})}
- Audience Insights: {user_data.get('onboarding_data', {}).get('target_audience', {})}
Provide performance predictions including:
1. Engagement rate
2. Reach estimates
3. Conversion predictions
4. ROI estimates
5. Confidence score
6. Recommendations
"""
# Generate prediction using AI
prediction_result = await self.calendar_generator_service.ai_engine.generate_structured_response(
prompt=prediction_prompt,
schema={
"type": "object",
"properties": {
"predicted_engagement_rate": {"type": "number"},
"predicted_reach": {"type": "integer"},
"predicted_conversions": {"type": "integer"},
"predicted_roi": {"type": "number"},
"confidence_score": {"type": "number"},
"recommendations": {"type": "array", "items": {"type": "string"}}
}
}
)
# Prepare response
response_data = {
"user_id": user_id,
"strategy_id": strategy_id,
"content_type": content_type,
"platform": platform,
"predicted_engagement_rate": prediction_result.get("predicted_engagement_rate", 0.05),
"predicted_reach": prediction_result.get("predicted_reach", 1000),
"predicted_conversions": prediction_result.get("predicted_conversions", 10),
"predicted_roi": prediction_result.get("predicted_roi", 2.5),
"confidence_score": prediction_result.get("confidence_score", 0.75),
"recommendations": prediction_result.get("recommendations", []),
"created_at": datetime.utcnow()
}
logger.info(f"✅ Performance prediction completed for user {user_id}")
return response_data
except Exception as e:
logger.error(f"❌ Error predicting content performance: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "predict_content_performance")
async def repurpose_content_across_platforms(self, user_id: int, original_content: Dict[str, Any],
target_platforms: List[str], strategy_id: Optional[int] = None) -> Dict[str, Any]:
"""Repurpose content across different platforms using database insights."""
try:
logger.info(f"🔄 Starting content repurposing for user {user_id}")
# Get user data for repurposing
user_data = await self.calendar_generator_service._get_comprehensive_user_data(
user_id,
strategy_id
)
# Generate repurposing suggestions
repurposing_prompt = f"""
Repurpose the following content for multiple platforms:
Original Content:
{original_content}
Target Platforms:
{target_platforms}
User Context:
- Gap Analysis: {user_data.get('gap_analysis', {})}
- Strategy Data: {user_data.get('strategy_data', {})}
- Recommendations: {user_data.get('recommendations_data', [])}
Provide repurposing suggestions including:
1. Platform-specific adaptations
2. Content transformations
3. Implementation tips
4. Gap addressing opportunities
"""
# Generate repurposing suggestions using AI
repurposing_result = await self.calendar_generator_service.ai_engine.generate_structured_response(
prompt=repurposing_prompt,
schema={
"type": "object",
"properties": {
"platform_adaptations": {"type": "array", "items": {"type": "object"}},
"transformations": {"type": "array", "items": {"type": "object"}},
"implementation_tips": {"type": "array", "items": {"type": "string"}},
"gap_addresses": {"type": "array", "items": {"type": "string"}}
}
}
)
# Prepare response
response_data = {
"user_id": user_id,
"strategy_id": strategy_id,
"original_content": original_content,
"platform_adaptations": repurposing_result.get("platform_adaptations", []),
"transformations": repurposing_result.get("transformations", []),
"implementation_tips": repurposing_result.get("implementation_tips", []),
"gap_addresses": repurposing_result.get("gap_addresses", []),
"created_at": datetime.utcnow()
}
logger.info(f"✅ Content repurposing completed for user {user_id}")
return response_data
except Exception as e:
logger.error(f"❌ Error repurposing content: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "repurpose_content_across_platforms")
async def get_trending_topics(self, user_id: int, industry: str, limit: int = 10) -> Dict[str, Any]:
"""Get trending topics relevant to the user's industry and content gaps."""
try:
logger.info(f"📈 Getting trending topics for user {user_id} in {industry}")
# Get user data for trending topics
user_data = await self.calendar_generator_service._get_comprehensive_user_data(user_id, None)
# Get trending topics with database insights
trending_topics = await self.calendar_generator_service._get_trending_topics_from_db(industry, user_data)
# Limit results
limited_topics = trending_topics[:limit]
# Calculate relevance scores
gap_relevance_scores = {}
audience_alignment_scores = {}
for topic in limited_topics:
topic_key = topic.get("keyword", "")
gap_relevance_scores[topic_key] = self.calendar_generator_service._assess_gap_relevance(topic, user_data.get("gap_analysis", {}))
audience_alignment_scores[topic_key] = self.calendar_generator_service._assess_audience_alignment(topic, user_data.get("onboarding_data", {}))
# Prepare response
response_data = {
"user_id": user_id,
"industry": industry,
"trending_topics": limited_topics,
"gap_relevance_scores": gap_relevance_scores,
"audience_alignment_scores": audience_alignment_scores,
"created_at": datetime.utcnow()
}
logger.info(f"✅ Trending topics retrieved for user {user_id}")
return response_data
except Exception as e:
logger.error(f"❌ Error getting trending topics: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "get_trending_topics")
async def get_comprehensive_user_data(self, user_id: int) -> Dict[str, Any]:
"""Get comprehensive user data for calendar generation."""
try:
logger.info(f"Getting comprehensive user data for user_id: {user_id}")
# Get comprehensive data using the calendar generator service
logger.info("Calling calendar generator service...")
comprehensive_data = await self.calendar_generator_service._get_comprehensive_user_data(user_id, None)
logger.info(f"Calendar generator service returned: {type(comprehensive_data)}")
logger.info(f"Successfully retrieved comprehensive user data for user_id: {user_id}")
return {
"status": "success",
"data": comprehensive_data,
"message": "Comprehensive user data retrieved successfully",
"timestamp": datetime.now().isoformat()
}
except Exception as e:
logger.error(f"Error getting comprehensive user data for user_id {user_id}: {str(e)}")
logger.error(f"Exception type: {type(e)}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
raise ContentPlanningErrorHandler.handle_general_error(e, "get_comprehensive_user_data")
async def health_check(self) -> Dict[str, Any]:
"""Health check for calendar generation services."""
try:
logger.info("🏥 Performing calendar generation health check")
# Check AI services
from services.api_key_manager import APIKeyManager
api_manager = APIKeyManager()
api_key_status = check_all_api_keys(api_manager)
# Check database connectivity
db_status = "healthy"
try:
# Test database connection - only if calendar generator service is properly initialized
if hasattr(self.calendar_generator_service, 'content_planning_db_service') and self.calendar_generator_service.content_planning_db_service is not None:
await self.calendar_generator_service.content_planning_db_service.get_user_content_gap_analyses(1)
else:
db_status = "not_initialized"
except Exception as e:
db_status = f"error: {str(e)}"
health_status = {
"service": "calendar_generation",
"status": "healthy" if api_key_status.get("all_valid", False) and db_status == "healthy" else "unhealthy",
"timestamp": datetime.utcnow().isoformat(),
"components": {
"ai_services": "healthy" if api_key_status.get("all_valid", False) else "unhealthy",
"database": db_status,
"calendar_generator": "healthy"
},
"api_keys": api_key_status
}
logger.info("✅ Calendar generation health check completed")
return health_status
except Exception as e:
logger.error(f"❌ Calendar generation health check failed: {str(e)}")
return {
"service": "calendar_generation",
"status": "unhealthy",
"timestamp": datetime.utcnow().isoformat(),
"error": str(e)
}

View File

@@ -0,0 +1,184 @@
"""
Calendar Service for Content Planning API
Extracted business logic from the calendar events route for better separation of concerns.
"""
from typing import Dict, Any, List, Optional
from datetime import datetime
from loguru import logger
from sqlalchemy.orm import Session
# Import database service
from services.content_planning_db import ContentPlanningDBService
# Import utilities
from ..utils.error_handlers import ContentPlanningErrorHandler
from ..utils.response_builders import ResponseBuilder
from ..utils.constants import ERROR_MESSAGES, SUCCESS_MESSAGES
class CalendarService:
"""Service class for calendar event operations."""
def __init__(self):
pass
async def create_calendar_event(self, event_data: Dict[str, Any], db: Session) -> Dict[str, Any]:
"""Create a new calendar event."""
try:
logger.info(f"Creating calendar event: {event_data.get('title', 'Unknown')}")
db_service = ContentPlanningDBService(db)
created_event = await db_service.create_calendar_event(event_data)
if created_event:
logger.info(f"Calendar event created successfully: {created_event.id}")
return created_event.to_dict()
else:
raise Exception("Failed to create calendar event")
except Exception as e:
logger.error(f"Error creating calendar event: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "create_calendar_event")
async def get_calendar_events(self, strategy_id: Optional[int] = None, db: Session = None) -> List[Dict[str, Any]]:
"""Get calendar events, optionally filtered by strategy."""
try:
logger.info("Fetching calendar events")
db_service = ContentPlanningDBService(db)
if strategy_id:
events = await db_service.get_strategy_calendar_events(strategy_id)
else:
# TODO: Implement get_all_calendar_events method
events = []
return [event.to_dict() for event in events]
except Exception as e:
logger.error(f"Error getting calendar events: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "get_calendar_events")
async def get_calendar_event_by_id(self, event_id: int, db: Session) -> Dict[str, Any]:
"""Get a specific calendar event by ID."""
try:
logger.info(f"Fetching calendar event: {event_id}")
db_service = ContentPlanningDBService(db)
event = await db_service.get_calendar_event(event_id)
if event:
return event.to_dict()
else:
raise ContentPlanningErrorHandler.handle_not_found_error("Calendar event", event_id)
except Exception as e:
logger.error(f"Error getting calendar event: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "get_calendar_event_by_id")
async def update_calendar_event(self, event_id: int, update_data: Dict[str, Any], db: Session) -> Dict[str, Any]:
"""Update a calendar event."""
try:
logger.info(f"Updating calendar event: {event_id}")
db_service = ContentPlanningDBService(db)
updated_event = await db_service.update_calendar_event(event_id, update_data)
if updated_event:
return updated_event.to_dict()
else:
raise ContentPlanningErrorHandler.handle_not_found_error("Calendar event", event_id)
except Exception as e:
logger.error(f"Error updating calendar event: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "update_calendar_event")
async def delete_calendar_event(self, event_id: int, db: Session) -> bool:
"""Delete a calendar event."""
try:
logger.info(f"Deleting calendar event: {event_id}")
db_service = ContentPlanningDBService(db)
deleted = await db_service.delete_calendar_event(event_id)
if deleted:
return True
else:
raise ContentPlanningErrorHandler.handle_not_found_error("Calendar event", event_id)
except Exception as e:
logger.error(f"Error deleting calendar event: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "delete_calendar_event")
async def get_events_by_status(self, strategy_id: int, status: str, db: Session) -> List[Dict[str, Any]]:
"""Get calendar events by status for a specific strategy."""
try:
logger.info(f"Fetching events for strategy {strategy_id} with status {status}")
db_service = ContentPlanningDBService(db)
events = await db_service.get_events_by_status(strategy_id, status)
return [event.to_dict() for event in events]
except Exception as e:
logger.error(f"Error getting events by status: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "get_events_by_status")
async def get_strategy_events(self, strategy_id: int, db: Session) -> Dict[str, Any]:
"""Get calendar events for a specific strategy."""
try:
logger.info(f"Fetching events for strategy: {strategy_id}")
db_service = ContentPlanningDBService(db)
events = await db_service.get_strategy_calendar_events(strategy_id)
return {
'strategy_id': strategy_id,
'events_count': len(events),
'events': [event.to_dict() for event in events]
}
except Exception as e:
logger.error(f"Error getting strategy events: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "get_strategy_events")
async def schedule_event(self, event_data: Dict[str, Any], db: Session) -> Dict[str, Any]:
"""Schedule a calendar event with conflict checking."""
try:
logger.info(f"Scheduling calendar event: {event_data.get('title', 'Unknown')}")
# Check for scheduling conflicts
conflicts = await self._check_scheduling_conflicts(event_data, db)
if conflicts:
logger.warning(f"Scheduling conflicts found: {conflicts}")
return {
"status": "conflict",
"message": "Scheduling conflicts detected",
"conflicts": conflicts,
"event_data": event_data
}
# Create the event
created_event = await self.create_calendar_event(event_data, db)
return {
"status": "success",
"message": "Calendar event scheduled successfully",
"event": created_event
}
except Exception as e:
logger.error(f"Error scheduling calendar event: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "schedule_event")
async def _check_scheduling_conflicts(self, event_data: Dict[str, Any], db: Session) -> List[Dict[str, Any]]:
"""Check for scheduling conflicts with existing events."""
try:
# This is a placeholder for conflict checking logic
# In a real implementation, you would check for overlapping times, etc.
return []
except Exception as e:
logger.error(f"Error checking scheduling conflicts: {str(e)}")
return []

View File

@@ -0,0 +1,346 @@
# Content Strategy Implementation Status & Next Steps
## 📊 **Current Implementation Status**
### **✅ Completed (Phase 1 - Foundation)**
#### **1. Backend Cleanup & Reorganization** ✅
- **✅ Deleted**: Old `strategy_service.py` (superseded by enhanced version)
- **✅ Created**: Modular structure with 12 focused modules
- **✅ Organized**: Related functionality into logical groups
- **✅ Tested**: All imports and routes working correctly
#### **2. AI Analysis Module** ✅ **COMPLETE**
- **✅ AI Recommendations Service**: 180 lines of comprehensive AI analysis
- **✅ Prompt Engineering Service**: 150 lines of specialized prompt creation
- **✅ Quality Validation Service**: 120 lines of quality assessment
- **✅ 5 Analysis Types**: Comprehensive, Audience, Competitive, Performance, Calendar
- **✅ Fallback System**: Robust error handling with fallback recommendations
- **✅ Database Integration**: AI analysis result storage and retrieval
#### **3. Core Infrastructure** ✅
- **✅ Core Strategy Service**: Main orchestration (188 lines)
- **✅ Field Mappings**: Strategic input field definitions (50 lines)
- **✅ Service Constants**: Configuration management (30 lines)
- **✅ API Integration**: Enhanced strategy routes working
### **🔄 In Progress (Phase 2 - Core Modules)**
#### **1. Onboarding Module** 🔄 **HIGH PRIORITY**
**Status**: Placeholder services created, needs implementation
- **❌ Data Integration Service**: Needs real functionality
- **❌ Field Transformation**: Needs logic implementation
- **❌ Data Quality Assessment**: Needs quality scoring
- **❌ Auto-Population**: Needs real data integration
**Next Steps**:
```python
# Priority 1: Implement data_integration.py
- Extract onboarding data processing from monolithic file
- Implement website analysis integration
- Add research preferences processing
- Create API keys data utilization
# Priority 2: Implement field_transformation.py
- Create data to field mapping logic
- Implement field transformation algorithms
- Add validation and error handling
- Test with real onboarding data
# Priority 3: Implement data_quality.py
- Add completeness scoring
- Implement confidence calculation
- Create freshness evaluation
- Add source attribution
```
#### **2. Performance Module** 🔄 **HIGH PRIORITY**
**Status**: Placeholder services created, needs implementation
- **❌ Caching Service**: Needs Redis integration
- **❌ Optimization Service**: Needs performance algorithms
- **❌ Health Monitoring**: Needs system health checks
- **❌ Metrics Collection**: Needs performance tracking
**Next Steps**:
```python
# Priority 1: Implement caching.py
- Add Redis integration for AI analysis cache
- Implement onboarding data cache (30 min TTL)
- Add strategy cache (2 hours TTL)
- Create intelligent cache eviction
# Priority 2: Implement optimization.py
- Add response time optimization
- Implement database query optimization
- Create resource management
- Add performance monitoring
# Priority 3: Implement health_monitoring.py
- Add database health checks
- Implement cache performance monitoring
- Create AI service health assessment
- Add response time tracking
```
#### **3. Utils Module** 🔄 **HIGH PRIORITY**
**Status**: Placeholder services created, needs implementation
- **❌ Data Processors**: Needs utility functions
- **❌ Validators**: Needs validation logic
- **❌ Helper Methods**: Needs common utilities
**Next Steps**:
```python
# Priority 1: Implement data_processors.py
- Add data transformation utilities
- Create data cleaning functions
- Implement data enrichment
- Add data validation helpers
# Priority 2: Implement validators.py
- Add field validation logic
- Implement data type checking
- Create business rule validation
- Add error message generation
```
### **📋 Pending (Phase 3 - Advanced Features)**
#### **1. Real AI Integration** 📋
- **❌ OpenAI Integration**: Connect to actual AI services
- **❌ Advanced Prompts**: Implement sophisticated prompt engineering
- **❌ Machine Learning**: Add ML capabilities
- **❌ Predictive Analytics**: Create predictive insights
#### **2. Enhanced Analytics** 📋
- **❌ Real-time Tracking**: Implement live performance monitoring
- **❌ Advanced Reporting**: Create comprehensive reports
- **❌ Custom Dashboards**: Build user dashboards
- **❌ Export Capabilities**: Add data export features
#### **3. User Experience** 📋
- **❌ Progressive Disclosure**: Implement guided interface
- **❌ Template Strategies**: Add pre-built strategy templates
- **❌ Interactive Tutorials**: Create user onboarding
- **❌ Smart Defaults**: Implement intelligent defaults
## 🎯 **Immediate Next Steps (Next 2-4 Weeks)**
### **Week 1-2: Complete Core Modules**
#### **1. Onboarding Integration** 🔥 **CRITICAL**
```python
# Day 1-2: Implement data_integration.py
- Extract onboarding data processing from monolithic file
- Implement website analysis integration
- Add research preferences processing
- Create API keys data utilization
# Day 3-4: Implement field_transformation.py
- Create data to field mapping logic
- Implement field transformation algorithms
- Add validation and error handling
- Test with real onboarding data
# Day 5-7: Implement data_quality.py
- Add completeness scoring
- Implement confidence calculation
- Create freshness evaluation
- Add source attribution
```
#### **2. Performance Optimization** 🔥 **CRITICAL**
```python
# Day 1-2: Implement caching.py
- Add Redis integration for AI analysis cache
- Implement onboarding data cache (30 min TTL)
- Add strategy cache (2 hours TTL)
- Create intelligent cache eviction
# Day 3-4: Implement optimization.py
- Add response time optimization
- Implement database query optimization
- Create resource management
- Add performance monitoring
# Day 5-7: Implement health_monitoring.py
- Add database health checks
- Implement cache performance monitoring
- Create AI service health assessment
- Add response time tracking
```
#### **3. Utils Implementation** 🔥 **CRITICAL**
```python
# Day 1-2: Implement data_processors.py
- Add data transformation utilities
- Create data cleaning functions
- Implement data enrichment
- Add data validation helpers
# Day 3-4: Implement validators.py
- Add field validation logic
- Implement data type checking
- Create business rule validation
- Add error message generation
```
### **Week 3-4: Testing & Integration**
#### **1. Comprehensive Testing**
```python
# Unit Tests
- Test each service independently
- Add comprehensive test coverage
- Implement mock services for testing
- Create test data fixtures
# Integration Tests
- Test service interactions
- Verify API endpoints
- Test database operations
- Validate error handling
# End-to-End Tests
- Test complete workflows
- Verify user scenarios
- Test performance under load
- Validate real-world usage
```
#### **2. Performance Optimization**
```python
# Performance Testing
- Measure response times
- Optimize database queries
- Implement caching strategies
- Monitor resource usage
# Load Testing
- Test with multiple users
- Verify scalability
- Monitor memory usage
- Optimize for production
```
## 🚀 **Medium-term Goals (Next 2-3 Months)**
### **Phase 2: Enhanced Features**
#### **1. Real AI Integration**
- [ ] Integrate with OpenAI API
- [ ] Add Claude API integration
- [ ] Implement advanced prompt engineering
- [ ] Create machine learning capabilities
#### **2. Advanced Analytics**
- [ ] Real-time performance tracking
- [ ] Advanced reporting system
- [ ] Custom dashboard creation
- [ ] Data export capabilities
#### **3. User Experience Improvements**
- [ ] Progressive disclosure implementation
- [ ] Guided wizard interface
- [ ] Template-based strategies
- [ ] Interactive tutorials
### **Phase 3: Enterprise Features**
#### **1. Advanced AI Capabilities**
- [ ] Multi-model AI integration
- [ ] Custom model training
- [ ] Advanced analytics
- [ ] Predictive insights
#### **2. Collaboration Features**
- [ ] Team collaboration tools
- [ ] Strategy sharing
- [ ] Version control
- [ ] Approval workflows
#### **3. Enterprise Integration**
- [ ] CRM integration
- [ ] Marketing automation
- [ ] Analytics platforms
- [ ] Custom API endpoints
## 📈 **Success Metrics & KPIs**
### **Technical Metrics**
- **Response Time**: < 2 seconds for strategy creation
- **Cache Hit Rate**: > 80% for frequently accessed data
- **Error Rate**: < 1% for all operations
- **Uptime**: > 99.9% availability
### **Quality Metrics**
- **AI Response Quality**: > 85% confidence scores
- **Data Completeness**: > 90% field completion
- **User Satisfaction**: > 4.5/5 rating
- **Strategy Effectiveness**: Measurable ROI improvements
### **Business Metrics**
- **User Adoption**: Growing user base
- **Feature Usage**: High engagement with AI features
- **Customer Retention**: > 90% monthly retention
- **Revenue Impact**: Measurable business value
## 🔧 **Development Guidelines**
### **1. Code Quality Standards**
- **Type Hints**: Use comprehensive type annotations
- **Documentation**: Document all public methods
- **Error Handling**: Implement robust error handling
- **Logging**: Add comprehensive logging
### **2. Testing Strategy**
- **Unit Tests**: Test each service independently
- **Integration Tests**: Test service interactions
- **End-to-End Tests**: Test complete workflows
- **Performance Tests**: Monitor response times
### **3. Performance Considerations**
- **Caching**: Implement intelligent caching strategies
- **Database Optimization**: Use efficient queries
- **Async Operations**: Use async/await for I/O operations
- **Resource Management**: Properly manage memory and connections
## 🎯 **Risk Assessment & Mitigation**
### **High Risk Items**
1. **Onboarding Integration Complexity**: Mitigation - Start with simple implementations
2. **Performance Optimization**: Mitigation - Implement caching first
3. **AI Service Integration**: Mitigation - Use fallback systems
4. **Database Performance**: Mitigation - Optimize queries and add indexing
### **Medium Risk Items**
1. **User Experience**: Mitigation - Implement progressive disclosure
2. **Data Quality**: Mitigation - Add comprehensive validation
3. **Scalability**: Mitigation - Design for horizontal scaling
4. **Maintenance**: Mitigation - Comprehensive documentation and testing
## 📋 **Resource Requirements**
### **Development Team**
- **Backend Developer**: 1-2 developers for core modules
- **AI Specialist**: 1 developer for AI integration
- **DevOps Engineer**: 1 engineer for deployment and monitoring
- **QA Engineer**: 1 engineer for testing and quality assurance
### **Infrastructure**
- **Database**: PostgreSQL with proper indexing
- **Cache**: Redis for performance optimization
- **AI Services**: OpenAI/Claude API integration
- **Monitoring**: Application performance monitoring
### **Timeline**
- **Phase 1 (Core Modules)**: 2-4 weeks
- **Phase 2 (Enhanced Features)**: 2-3 months
- **Phase 3 (Enterprise Features)**: 6-12 months
## 🎉 **Conclusion**
The Content Strategy Services have a solid foundation with the AI Analysis module complete and the core infrastructure in place. The immediate priority is to complete the Onboarding, Performance, and Utils modules to create a fully functional system. With proper implementation of the next steps, the system will provide enterprise-level content strategy capabilities to solopreneurs and small businesses.
**Current Status**: 40% Complete (Foundation + AI Analysis)
**Next Milestone**: 70% Complete (Core Modules)
**Target Completion**: 100% Complete (All Features)

View File

@@ -0,0 +1,363 @@
# Content Strategy Services
## 🎯 **Overview**
The Content Strategy Services module provides comprehensive content strategy management with 30+ strategic inputs, AI-powered recommendations, and enterprise-level analysis capabilities. This modular architecture enables solopreneurs, small business owners, and startups to access expert-level content strategy without requiring expensive digital marketing teams.
## 🏗️ **Architecture**
```
content_strategy/
├── core/ # Main orchestration & configuration
│ ├── strategy_service.py # Main service orchestration
│ ├── field_mappings.py # Strategic input field definitions
│ └── constants.py # Service configuration
├── ai_analysis/ # AI recommendation generation
│ ├── ai_recommendations.py # Comprehensive AI analysis
│ ├── prompt_engineering.py # Specialized prompt creation
│ └── quality_validation.py # Quality assessment & scoring
├── onboarding/ # Onboarding data integration
│ ├── data_integration.py # Onboarding data processing
│ ├── field_transformation.py # Data to field mapping
│ └── data_quality.py # Quality assessment
├── performance/ # Performance optimization
│ ├── caching.py # Cache management
│ ├── optimization.py # Performance optimization
│ └── health_monitoring.py # System health checks
└── utils/ # Data processing utilities
├── data_processors.py # Data processing utilities
└── validators.py # Data validation
```
## 🚀 **Key Features**
### **1. Comprehensive Strategic Inputs (30+ Fields)**
#### **Business Context**
- Business Objectives & Target Metrics
- Content Budget & Team Size
- Implementation Timeline & Market Share
- Competitive Position & Performance Metrics
#### **Audience Intelligence**
- Content Preferences & Consumption Patterns
- Audience Pain Points & Buying Journey
- Seasonal Trends & Engagement Metrics
#### **Competitive Intelligence**
- Top Competitors & Competitor Strategies
- Market Gaps & Industry Trends
- Emerging Trends Analysis
#### **Content Strategy**
- Preferred Formats & Content Mix
- Content Frequency & Optimal Timing
- Quality Metrics & Editorial Guidelines
- Brand Voice Definition
#### **Performance Analytics**
- Traffic Sources & Conversion Rates
- Content ROI Targets & A/B Testing
### **2. AI-Powered Recommendations**
#### **Comprehensive Analysis Types**
- **Comprehensive Strategy**: Full strategic positioning and market analysis
- **Audience Intelligence**: Detailed audience persona development
- **Competitive Intelligence**: Competitor analysis and market positioning
- **Performance Optimization**: Traffic and conversion optimization
- **Content Calendar Optimization**: Scheduling and timing optimization
#### **Quality Assessment**
- AI Response Quality Validation
- Strategic Score Calculation
- Market Positioning Analysis
- Competitive Advantage Extraction
- Risk Assessment & Opportunity Analysis
### **3. Onboarding Data Integration**
#### **Smart Auto-Population**
- Website Analysis Integration
- Research Preferences Processing
- API Keys Data Utilization
- Field Transformation & Mapping
#### **Data Quality Assessment**
- Completeness Scoring
- Confidence Level Calculation
- Data Freshness Evaluation
- Source Attribution
### **4. Performance Optimization**
#### **Caching System**
- AI Analysis Cache (1 hour TTL)
- Onboarding Data Cache (30 minutes TTL)
- Strategy Cache (2 hours TTL)
- Intelligent Cache Eviction
#### **Health Monitoring**
- Database Health Checks
- Cache Performance Monitoring
- AI Service Health Assessment
- Response Time Optimization
## 📊 **Current Implementation Status**
### **✅ Completed Features**
#### **1. Core Infrastructure**
- [x] Modular service architecture
- [x] Core strategy service orchestration
- [x] Strategic input field definitions
- [x] Service configuration management
#### **2. AI Analysis Module**
- [x] AI recommendations service (180 lines)
- [x] Prompt engineering service (150 lines)
- [x] Quality validation service (120 lines)
- [x] 5 specialized analysis types
- [x] Fallback recommendation system
- [x] Quality assessment capabilities
#### **3. Database Integration**
- [x] Enhanced strategy models
- [x] AI analysis result storage
- [x] Onboarding data integration
- [x] Performance metrics tracking
#### **4. API Integration**
- [x] Enhanced strategy routes
- [x] Onboarding data endpoints
- [x] AI analytics endpoints
- [x] Performance monitoring endpoints
### **🔄 In Progress**
#### **1. Onboarding Module**
- [ ] Data integration service implementation
- [ ] Field transformation logic
- [ ] Data quality assessment
- [ ] Auto-population functionality
#### **2. Performance Module**
- [ ] Caching service implementation
- [ ] Optimization algorithms
- [ ] Health monitoring system
- [ ] Performance metrics collection
#### **3. Utils Module**
- [ ] Data processing utilities
- [ ] Validation functions
- [ ] Helper methods
### **📋 Pending Implementation**
#### **1. Advanced AI Features**
- [ ] Real AI service integration
- [ ] Advanced prompt engineering
- [ ] Machine learning models
- [ ] Predictive analytics
#### **2. Enhanced Analytics**
- [ ] Real-time performance tracking
- [ ] Advanced reporting
- [ ] Custom dashboards
- [ ] Export capabilities
#### **3. User Experience**
- [ ] Progressive disclosure
- [ ] Guided wizard interface
- [ ] Template-based strategies
- [ ] Interactive tutorials
## 🎯 **Next Steps Priority**
### **Phase 1: Complete Core Modules (Immediate)**
#### **1. Onboarding Integration** 🔥 **HIGH PRIORITY**
```python
# Priority: Complete onboarding data integration
- Implement data_integration.py with real functionality
- Add field_transformation.py logic
- Implement data_quality.py assessment
- Test auto-population with real data
```
#### **2. Performance Optimization** 🔥 **HIGH PRIORITY**
```python
# Priority: Implement caching and optimization
- Complete caching.py with Redis integration
- Add optimization.py algorithms
- Implement health_monitoring.py
- Add performance metrics collection
```
#### **3. Utils Implementation** 🔥 **HIGH PRIORITY**
```python
# Priority: Add utility functions
- Implement data_processors.py
- Add validators.py functions
- Create helper methods
- Add comprehensive error handling
```
### **Phase 2: Enhanced Features (Short-term)**
#### **1. Real AI Integration**
- [ ] Integrate with actual AI services (OpenAI, Claude, etc.)
- [ ] Implement advanced prompt engineering
- [ ] Add machine learning capabilities
- [ ] Create predictive analytics
#### **2. Advanced Analytics**
- [ ] Real-time performance tracking
- [ ] Advanced reporting system
- [ ] Custom dashboard creation
- [ ] Data export capabilities
#### **3. User Experience Improvements**
- [ ] Progressive disclosure implementation
- [ ] Guided wizard interface
- [ ] Template-based strategies
- [ ] Interactive tutorials
### **Phase 3: Enterprise Features (Long-term)**
#### **1. Advanced AI Capabilities**
- [ ] Multi-model AI integration
- [ ] Custom model training
- [ ] Advanced analytics
- [ ] Predictive insights
#### **2. Collaboration Features**
- [ ] Team collaboration tools
- [ ] Strategy sharing
- [ ] Version control
- [ ] Approval workflows
#### **3. Enterprise Integration**
- [ ] CRM integration
- [ ] Marketing automation
- [ ] Analytics platforms
- [ ] Custom API endpoints
## 🔧 **Development Guidelines**
### **1. Module Boundaries**
- **Respect service responsibilities**: Each module has clear boundaries
- **Use dependency injection**: Services should be loosely coupled
- **Follow single responsibility**: Each service has one primary purpose
- **Maintain clear interfaces**: Well-defined method signatures
### **2. Testing Strategy**
- **Unit tests**: Test each service independently
- **Integration tests**: Test service interactions
- **End-to-end tests**: Test complete workflows
- **Performance tests**: Monitor response times
### **3. Code Quality**
- **Type hints**: Use comprehensive type annotations
- **Documentation**: Document all public methods
- **Error handling**: Implement robust error handling
- **Logging**: Add comprehensive logging
### **4. Performance Considerations**
- **Caching**: Implement intelligent caching strategies
- **Database optimization**: Use efficient queries
- **Async operations**: Use async/await for I/O operations
- **Resource management**: Properly manage memory and connections
## 📈 **Success Metrics**
### **1. Performance Metrics**
- **Response Time**: < 2 seconds for strategy creation
- **Cache Hit Rate**: > 80% for frequently accessed data
- **Error Rate**: < 1% for all operations
- **Uptime**: > 99.9% availability
### **2. Quality Metrics**
- **AI Response Quality**: > 85% confidence scores
- **Data Completeness**: > 90% field completion
- **User Satisfaction**: > 4.5/5 rating
- **Strategy Effectiveness**: Measurable ROI improvements
### **3. Business Metrics**
- **User Adoption**: Growing user base
- **Feature Usage**: High engagement with AI features
- **Customer Retention**: > 90% monthly retention
- **Revenue Impact**: Measurable business value
## 🚀 **Getting Started**
### **1. Setup Development Environment**
```bash
# Install dependencies
pip install -r requirements.txt
# Set up database
python manage.py migrate
# Run tests
python -m pytest tests/
```
### **2. Run the Service**
```bash
# Start the development server
uvicorn main:app --reload
# Access the API
curl http://localhost:8000/api/content-planning/strategies/
```
### **3. Test AI Features**
```python
# Create a strategy with AI recommendations
from api.content_planning.services.content_strategy import EnhancedStrategyService
service = EnhancedStrategyService()
strategy = await service.create_enhanced_strategy(strategy_data, db)
```
## 📚 **Documentation**
- **API Documentation**: `/docs` endpoint for interactive API docs
- **Code Documentation**: Comprehensive docstrings in all modules
- **Architecture Guide**: Detailed system architecture documentation
- **User Guide**: Step-by-step user instructions
## 🤝 **Contributing**
### **1. Development Workflow**
- Create feature branches from `main`
- Write comprehensive tests
- Update documentation
- Submit pull requests
### **2. Code Review Process**
- All changes require code review
- Automated testing must pass
- Documentation must be updated
- Performance impact must be assessed
### **3. Release Process**
- Semantic versioning
- Changelog maintenance
- Automated deployment
- Rollback procedures
## 📞 **Support**
For questions, issues, or contributions:
- **Issues**: Create GitHub issues for bugs or feature requests
- **Discussions**: Use GitHub discussions for questions
- **Documentation**: Check the comprehensive documentation
- **Community**: Join our developer community
---
**Last Updated**: August 2024
**Version**: 1.0.0
**Status**: Active Development

View File

@@ -0,0 +1,8 @@
"""
Content Strategy Module
Modular implementation of enhanced content strategy services.
"""
from .core.strategy_service import EnhancedStrategyService as ModularEnhancedStrategyService
__all__ = ['ModularEnhancedStrategyService']

View File

@@ -0,0 +1,10 @@
"""
AI Analysis Module
AI recommendation generation and analysis services.
"""
from .ai_recommendations import AIRecommendationsService
from .prompt_engineering import PromptEngineeringService
from .quality_validation import QualityValidationService
__all__ = ['AIRecommendationsService', 'PromptEngineeringService', 'QualityValidationService']

View File

@@ -0,0 +1,182 @@
"""
AI Recommendations Service
AI recommendation generation and analysis.
"""
import logging
from typing import Dict, Any, Optional, List
from datetime import datetime
from sqlalchemy.orm import Session
# Import database models
from models.enhanced_strategy_models import EnhancedContentStrategy, EnhancedAIAnalysisResult
# Import modular components
from .prompt_engineering import PromptEngineeringService
from .quality_validation import QualityValidationService
logger = logging.getLogger(__name__)
class AIRecommendationsService:
"""Service for AI recommendation generation."""
def __init__(self):
self.prompt_engineering_service = PromptEngineeringService()
self.quality_validation_service = QualityValidationService()
# Analysis types for comprehensive recommendations
self.analysis_types = [
'comprehensive_strategy',
'audience_intelligence',
'competitive_intelligence',
'performance_optimization',
'content_calendar_optimization'
]
async def generate_comprehensive_recommendations(self, strategy: EnhancedContentStrategy, db: Session) -> None:
"""Generate comprehensive AI recommendations using 5 specialized prompts."""
try:
logger.info(f"Generating comprehensive AI recommendations for strategy: {strategy.id}")
start_time = datetime.utcnow()
# Generate recommendations for each analysis type
ai_recommendations = {}
for analysis_type in self.analysis_types:
try:
recommendations = await self._generate_specialized_recommendations(
strategy, analysis_type, db
)
ai_recommendations[analysis_type] = recommendations
# Store individual analysis result
analysis_result = EnhancedAIAnalysisResult(
user_id=strategy.user_id,
strategy_id=strategy.id,
analysis_type=analysis_type,
comprehensive_insights=recommendations.get('comprehensive_insights'),
audience_intelligence=recommendations.get('audience_intelligence'),
competitive_intelligence=recommendations.get('competitive_intelligence'),
performance_optimization=recommendations.get('performance_optimization'),
content_calendar_optimization=recommendations.get('content_calendar_optimization'),
onboarding_data_used=strategy.onboarding_data_used,
processing_time=(datetime.utcnow() - start_time).total_seconds(),
ai_service_status="operational"
)
db.add(analysis_result)
except Exception as e:
logger.error(f"Error generating {analysis_type} recommendations: {str(e)}")
# Continue with other analysis types
db.commit()
# Update strategy with comprehensive AI analysis
strategy.comprehensive_ai_analysis = ai_recommendations
strategy.strategic_scores = self.quality_validation_service.calculate_strategic_scores(ai_recommendations)
strategy.market_positioning = self.quality_validation_service.extract_market_positioning(ai_recommendations)
strategy.competitive_advantages = self.quality_validation_service.extract_competitive_advantages(ai_recommendations)
strategy.strategic_risks = self.quality_validation_service.extract_strategic_risks(ai_recommendations)
strategy.opportunity_analysis = self.quality_validation_service.extract_opportunity_analysis(ai_recommendations)
db.commit()
processing_time = (datetime.utcnow() - start_time).total_seconds()
logger.info(f"Comprehensive AI recommendations generated in {processing_time:.2f} seconds")
except Exception as e:
logger.error(f"Error generating comprehensive AI recommendations: {str(e)}")
# Don't raise error, just log it as this is enhancement, not core functionality
async def _generate_specialized_recommendations(self, strategy: EnhancedContentStrategy, analysis_type: str, db: Session) -> Dict[str, Any]:
"""Generate specialized recommendations using specific AI prompts."""
try:
# Prepare strategy data for AI analysis
strategy_data = strategy.to_dict()
# Create prompt based on analysis type
prompt = self.prompt_engineering_service.create_specialized_prompt(strategy, analysis_type)
# Generate AI response
ai_response = await self._call_ai_service(prompt, analysis_type)
# Parse and structure the response
structured_response = self._parse_ai_response(ai_response, analysis_type)
return structured_response
except Exception as e:
logger.error(f"Error generating {analysis_type} recommendations: {str(e)}")
return self._get_fallback_recommendations(analysis_type)
async def _call_ai_service(self, prompt: str, analysis_type: str) -> Dict[str, Any]:
"""Call AI service to generate recommendations."""
# Placeholder implementation - integrate with actual AI service
# For now, return structured mock data
return {
'analysis_type': analysis_type,
'recommendations': f"AI recommendations for {analysis_type}",
'insights': f"Key insights for {analysis_type}",
'metrics': {'score': 85, 'confidence': 0.9}
}
def _parse_ai_response(self, ai_response: Dict[str, Any], analysis_type: str) -> Dict[str, Any]:
"""Parse and structure AI response."""
return {
'analysis_type': analysis_type,
'recommendations': ai_response.get('recommendations', []),
'insights': ai_response.get('insights', []),
'metrics': ai_response.get('metrics', {}),
'confidence_score': ai_response.get('metrics', {}).get('confidence', 0.8)
}
def _get_fallback_recommendations(self, analysis_type: str) -> Dict[str, Any]:
"""Get fallback recommendations when AI service fails."""
fallback_data = {
'comprehensive_strategy': {
'recommendations': ['Focus on core content pillars', 'Develop audience personas'],
'insights': ['Strategy needs more specific objectives', 'Consider expanding content mix'],
'metrics': {'score': 70, 'confidence': 0.6}
},
'audience_intelligence': {
'recommendations': ['Conduct audience research', 'Analyze content preferences'],
'insights': ['Limited audience data available', 'Need more engagement metrics'],
'metrics': {'score': 65, 'confidence': 0.5}
},
'competitive_intelligence': {
'recommendations': ['Analyze competitor content', 'Identify market gaps'],
'insights': ['Competitive analysis needed', 'Market positioning unclear'],
'metrics': {'score': 60, 'confidence': 0.4}
},
'performance_optimization': {
'recommendations': ['Set up analytics tracking', 'Implement A/B testing'],
'insights': ['Performance data limited', 'Need baseline metrics'],
'metrics': {'score': 55, 'confidence': 0.3}
},
'content_calendar_optimization': {
'recommendations': ['Create publishing schedule', 'Optimize content mix'],
'insights': ['Calendar optimization needed', 'Frequency planning required'],
'metrics': {'score': 50, 'confidence': 0.2}
}
}
return fallback_data.get(analysis_type, {
'recommendations': ['General strategy improvement needed'],
'insights': ['Limited data available for analysis'],
'metrics': {'score': 50, 'confidence': 0.3}
})
async def get_latest_ai_analysis(self, strategy_id: int, db: Session) -> Optional[Dict[str, Any]]:
"""Get latest AI analysis for a strategy."""
try:
analysis = db.query(EnhancedAIAnalysisResult).filter(
EnhancedAIAnalysisResult.strategy_id == strategy_id
).order_by(EnhancedAIAnalysisResult.created_at.desc()).first()
return analysis.to_dict() if analysis else None
except Exception as e:
logger.error(f"Error getting latest AI analysis: {str(e)}")
return None

View File

@@ -0,0 +1,169 @@
"""
Prompt Engineering Service
AI prompt creation and management.
"""
import logging
from typing import Dict, Any
# Import database models
from models.enhanced_strategy_models import EnhancedContentStrategy
logger = logging.getLogger(__name__)
class PromptEngineeringService:
"""Service for prompt engineering."""
def __init__(self):
pass
def create_specialized_prompt(self, strategy: EnhancedContentStrategy, analysis_type: str) -> str:
"""Create specialized AI prompts for each analysis type."""
base_context = f"""
Business Context:
- Industry: {strategy.industry}
- Business Objectives: {strategy.business_objectives}
- Target Metrics: {strategy.target_metrics}
- Content Budget: {strategy.content_budget}
- Team Size: {strategy.team_size}
- Implementation Timeline: {strategy.implementation_timeline}
- Market Share: {strategy.market_share}
- Competitive Position: {strategy.competitive_position}
- Performance Metrics: {strategy.performance_metrics}
Audience Intelligence:
- Content Preferences: {strategy.content_preferences}
- Consumption Patterns: {strategy.consumption_patterns}
- Audience Pain Points: {strategy.audience_pain_points}
- Buying Journey: {strategy.buying_journey}
- Seasonal Trends: {strategy.seasonal_trends}
- Engagement Metrics: {strategy.engagement_metrics}
Competitive Intelligence:
- Top Competitors: {strategy.top_competitors}
- Competitor Content Strategies: {strategy.competitor_content_strategies}
- Market Gaps: {strategy.market_gaps}
- Industry Trends: {strategy.industry_trends}
- Emerging Trends: {strategy.emerging_trends}
Content Strategy:
- Preferred Formats: {strategy.preferred_formats}
- Content Mix: {strategy.content_mix}
- Content Frequency: {strategy.content_frequency}
- Optimal Timing: {strategy.optimal_timing}
- Quality Metrics: {strategy.quality_metrics}
- Editorial Guidelines: {strategy.editorial_guidelines}
- Brand Voice: {strategy.brand_voice}
Performance & Analytics:
- Traffic Sources: {strategy.traffic_sources}
- Conversion Rates: {strategy.conversion_rates}
- Content ROI Targets: {strategy.content_roi_targets}
- A/B Testing Capabilities: {strategy.ab_testing_capabilities}
"""
specialized_prompts = {
'comprehensive_strategy': f"""
{base_context}
TASK: Generate a comprehensive content strategy analysis that provides:
1. Strategic positioning and market analysis
2. Audience targeting and persona development
3. Content pillar recommendations with rationale
4. Competitive advantage identification
5. Performance optimization strategies
6. Risk assessment and mitigation plans
7. Implementation roadmap with milestones
8. Success metrics and KPIs
REQUIREMENTS:
- Provide actionable, specific recommendations
- Include data-driven insights
- Consider industry best practices
- Address both short-term and long-term goals
- Provide confidence levels for each recommendation
""",
'audience_intelligence': f"""
{base_context}
TASK: Generate detailed audience intelligence analysis including:
1. Comprehensive audience persona development
2. Content preference analysis and recommendations
3. Consumption pattern insights and optimization
4. Pain point identification and content solutions
5. Buying journey mapping and content alignment
6. Seasonal trend analysis and content planning
7. Engagement pattern analysis and optimization
8. Audience segmentation strategies
REQUIREMENTS:
- Use data-driven insights from provided metrics
- Provide specific content recommendations for each audience segment
- Include engagement optimization strategies
- Consider cultural and behavioral factors
""",
'competitive_intelligence': f"""
{base_context}
TASK: Generate comprehensive competitive intelligence analysis including:
1. Competitor content strategy analysis
2. Market gap identification and opportunities
3. Competitive advantage development strategies
4. Industry trend analysis and implications
5. Emerging trend identification and early adoption strategies
6. Competitive positioning recommendations
7. Market opportunity assessment
8. Competitive response strategies
REQUIREMENTS:
- Analyze provided competitor data thoroughly
- Identify unique market opportunities
- Provide actionable competitive strategies
- Consider both direct and indirect competitors
""",
'performance_optimization': f"""
{base_context}
TASK: Generate performance optimization analysis including:
1. Current performance analysis and benchmarking
2. Traffic source optimization strategies
3. Conversion rate improvement recommendations
4. Content ROI optimization strategies
5. A/B testing framework and recommendations
6. Performance monitoring and analytics setup
7. Optimization roadmap and priorities
8. Success metrics and tracking implementation
REQUIREMENTS:
- Provide specific, measurable optimization strategies
- Include data-driven recommendations
- Consider both technical and content optimizations
- Provide implementation timelines and priorities
""",
'content_calendar_optimization': f"""
{base_context}
TASK: Generate content calendar optimization analysis including:
1. Optimal content frequency and timing analysis
2. Content mix optimization and balance
3. Seasonal content planning and scheduling
4. Content pillar integration and scheduling
5. Platform-specific content adaptation
6. Content repurposing and amplification strategies
7. Editorial calendar optimization
8. Content performance tracking and adjustment
REQUIREMENTS:
- Provide specific scheduling recommendations
- Include content mix optimization strategies
- Consider platform-specific requirements
- Provide seasonal and trend-based planning
"""
}
return specialized_prompts.get(analysis_type, base_context)

View File

@@ -0,0 +1,166 @@
"""
Quality Validation Service
AI response quality assessment and strategic analysis.
"""
import logging
from typing import Dict, Any, List
logger = logging.getLogger(__name__)
class QualityValidationService:
"""Service for quality validation and strategic analysis."""
def __init__(self):
pass
def calculate_strategic_scores(self, ai_recommendations: Dict[str, Any]) -> Dict[str, float]:
"""Calculate strategic performance scores from AI recommendations."""
scores = {
'overall_score': 0.0,
'content_quality_score': 0.0,
'engagement_score': 0.0,
'conversion_score': 0.0,
'innovation_score': 0.0
}
# Calculate scores based on AI recommendations
total_confidence = 0
total_score = 0
for analysis_type, recommendations in ai_recommendations.items():
if isinstance(recommendations, dict) and 'metrics' in recommendations:
metrics = recommendations['metrics']
score = metrics.get('score', 50)
confidence = metrics.get('confidence', 0.5)
total_score += score * confidence
total_confidence += confidence
if total_confidence > 0:
scores['overall_score'] = total_score / total_confidence
# Set other scores based on overall score
scores['content_quality_score'] = scores['overall_score'] * 1.1
scores['engagement_score'] = scores['overall_score'] * 0.9
scores['conversion_score'] = scores['overall_score'] * 0.95
scores['innovation_score'] = scores['overall_score'] * 1.05
return scores
def extract_market_positioning(self, ai_recommendations: Dict[str, Any]) -> Dict[str, Any]:
"""Extract market positioning from AI recommendations."""
return {
'industry_position': 'emerging',
'competitive_advantage': 'AI-powered content',
'market_share': '2.5%',
'positioning_score': 4
}
def extract_competitive_advantages(self, ai_recommendations: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Extract competitive advantages from AI recommendations."""
return [
{
'advantage': 'AI-powered content creation',
'impact': 'High',
'implementation': 'In Progress'
},
{
'advantage': 'Data-driven strategy',
'impact': 'Medium',
'implementation': 'Complete'
}
]
def extract_strategic_risks(self, ai_recommendations: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Extract strategic risks from AI recommendations."""
return [
{
'risk': 'Content saturation in market',
'probability': 'Medium',
'impact': 'High'
},
{
'risk': 'Algorithm changes affecting reach',
'probability': 'High',
'impact': 'Medium'
}
]
def extract_opportunity_analysis(self, ai_recommendations: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Extract opportunity analysis from AI recommendations."""
return [
{
'opportunity': 'Video content expansion',
'potential_impact': 'High',
'implementation_ease': 'Medium'
},
{
'opportunity': 'Social media engagement',
'potential_impact': 'Medium',
'implementation_ease': 'High'
}
]
def validate_ai_response_quality(self, ai_response: Dict[str, Any]) -> Dict[str, Any]:
"""Validate the quality of AI response."""
quality_metrics = {
'completeness': 0.0,
'relevance': 0.0,
'actionability': 0.0,
'confidence': 0.0,
'overall_quality': 0.0
}
# Calculate completeness
required_fields = ['recommendations', 'insights', 'metrics']
present_fields = sum(1 for field in required_fields if field in ai_response)
quality_metrics['completeness'] = present_fields / len(required_fields)
# Calculate relevance (placeholder logic)
quality_metrics['relevance'] = 0.8 if ai_response.get('analysis_type') else 0.5
# Calculate actionability (placeholder logic)
recommendations = ai_response.get('recommendations', [])
quality_metrics['actionability'] = min(1.0, len(recommendations) / 5.0)
# Calculate confidence
metrics = ai_response.get('metrics', {})
quality_metrics['confidence'] = metrics.get('confidence', 0.5)
# Calculate overall quality
quality_metrics['overall_quality'] = sum(quality_metrics.values()) / len(quality_metrics)
return quality_metrics
def assess_strategy_quality(self, strategy_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess the overall quality of a content strategy."""
quality_assessment = {
'data_completeness': 0.0,
'strategic_clarity': 0.0,
'implementation_readiness': 0.0,
'competitive_positioning': 0.0,
'overall_quality': 0.0
}
# Assess data completeness
required_fields = [
'business_objectives', 'target_metrics', 'content_budget',
'team_size', 'implementation_timeline'
]
present_fields = sum(1 for field in required_fields if strategy_data.get(field))
quality_assessment['data_completeness'] = present_fields / len(required_fields)
# Assess strategic clarity (placeholder logic)
quality_assessment['strategic_clarity'] = 0.7 if strategy_data.get('business_objectives') else 0.3
# Assess implementation readiness (placeholder logic)
quality_assessment['implementation_readiness'] = 0.6 if strategy_data.get('team_size') else 0.2
# Assess competitive positioning (placeholder logic)
quality_assessment['competitive_positioning'] = 0.5 if strategy_data.get('competitive_position') else 0.2
# Calculate overall quality
quality_assessment['overall_quality'] = sum(quality_assessment.values()) / len(quality_assessment)
return quality_assessment

View File

@@ -0,0 +1,10 @@
"""
Core Content Strategy Services
Main orchestration and core functionality.
"""
from .strategy_service import EnhancedStrategyService
from .field_mappings import STRATEGIC_INPUT_FIELDS
from .constants import SERVICE_CONSTANTS
__all__ = ['EnhancedStrategyService', 'STRATEGIC_INPUT_FIELDS', 'SERVICE_CONSTANTS']

View File

@@ -0,0 +1,33 @@
"""
Service Constants for Content Strategy
Configuration and settings for the enhanced strategy service.
"""
# Performance optimization settings
PROMPT_VERSIONS = {
'comprehensive_strategy': 'v2.1',
'audience_intelligence': 'v2.0',
'competitive_intelligence': 'v2.0',
'performance_optimization': 'v2.1',
'content_calendar_optimization': 'v2.0'
}
QUALITY_THRESHOLDS = {
'min_confidence': 0.7,
'min_completeness': 0.8,
'max_response_time': 30.0 # seconds
}
CACHE_SETTINGS = {
'ai_analysis_cache_ttl': 3600, # 1 hour
'onboarding_data_cache_ttl': 1800, # 30 minutes
'strategy_cache_ttl': 7200, # 2 hours
'max_cache_size': 1000 # Maximum cached items
}
# Service constants
SERVICE_CONSTANTS = {
'prompt_versions': PROMPT_VERSIONS,
'quality_thresholds': QUALITY_THRESHOLDS,
'cache_settings': CACHE_SETTINGS
}

View File

@@ -0,0 +1,56 @@
"""
Strategic Input Field Mappings
Definitions for the 30+ strategic input fields.
"""
# Define the 30+ strategic input fields
STRATEGIC_INPUT_FIELDS = {
'business_context': [
'business_objectives', 'target_metrics', 'content_budget', 'team_size',
'implementation_timeline', 'market_share', 'competitive_position', 'performance_metrics'
],
'audience_intelligence': [
'content_preferences', 'consumption_patterns', 'audience_pain_points',
'buying_journey', 'seasonal_trends', 'engagement_metrics'
],
'competitive_intelligence': [
'top_competitors', 'competitor_content_strategies', 'market_gaps',
'industry_trends', 'emerging_trends'
],
'content_strategy': [
'preferred_formats', 'content_mix', 'content_frequency', 'optimal_timing',
'quality_metrics', 'editorial_guidelines', 'brand_voice'
],
'performance_analytics': [
'traffic_sources', 'conversion_rates', 'content_roi_targets', 'ab_testing_capabilities'
]
}
# Field categories for organization
FIELD_CATEGORIES = {
'business_context': {
'name': 'Business Context',
'description': 'Core business objectives and metrics',
'fields': STRATEGIC_INPUT_FIELDS['business_context']
},
'audience_intelligence': {
'name': 'Audience Intelligence',
'description': 'Target audience analysis and insights',
'fields': STRATEGIC_INPUT_FIELDS['audience_intelligence']
},
'competitive_intelligence': {
'name': 'Competitive Intelligence',
'description': 'Competitor analysis and market positioning',
'fields': STRATEGIC_INPUT_FIELDS['competitive_intelligence']
},
'content_strategy': {
'name': 'Content Strategy',
'description': 'Content planning and execution',
'fields': STRATEGIC_INPUT_FIELDS['content_strategy']
},
'performance_analytics': {
'name': 'Performance & Analytics',
'description': 'Performance tracking and optimization',
'fields': STRATEGIC_INPUT_FIELDS['performance_analytics']
}
}

View File

@@ -0,0 +1,349 @@
"""
Enhanced Strategy Service - Core Module
Main orchestration service for content strategy operations.
"""
import logging
from typing import Dict, Any, Optional, List, Union
from datetime import datetime
from sqlalchemy.orm import Session
# Import database models
from models.enhanced_strategy_models import EnhancedContentStrategy, EnhancedAIAnalysisResult
# Import modular services
from ..ai_analysis.ai_recommendations import AIRecommendationsService
from ..ai_analysis.prompt_engineering import PromptEngineeringService
from ..ai_analysis.quality_validation import QualityValidationService
# Import onboarding services
from ..onboarding.data_integration import OnboardingDataIntegrationService
from ..onboarding.field_transformation import FieldTransformationService
from ..onboarding.data_quality import DataQualityService
# Import performance services
from ..performance.caching import CachingService
from ..performance.optimization import PerformanceOptimizationService
from ..performance.health_monitoring import HealthMonitoringService
# Import utils services
from ..utils.data_processors import DataProcessorService
from ..utils.validators import ValidationService
# Import core components
from .field_mappings import STRATEGIC_INPUT_FIELDS
from .constants import SERVICE_CONSTANTS
logger = logging.getLogger(__name__)
class EnhancedStrategyService:
"""Enhanced content strategy service with modular architecture."""
def __init__(self):
# Initialize AI analysis services
self.ai_recommendations_service = AIRecommendationsService()
self.prompt_engineering_service = PromptEngineeringService()
self.quality_validation_service = QualityValidationService()
# Initialize onboarding services
self.onboarding_data_service = OnboardingDataIntegrationService()
self.field_transformation_service = FieldTransformationService()
self.data_quality_service = DataQualityService()
# Initialize performance services
self.caching_service = CachingService()
self.performance_optimization_service = PerformanceOptimizationService()
self.health_monitoring_service = HealthMonitoringService()
# Initialize utils services
self.data_processor_service = DataProcessorService()
self.validation_service = ValidationService()
async def create_enhanced_strategy(self, strategy_data: Dict[str, Any], user_id: int, db: Session) -> EnhancedContentStrategy:
"""Create enhanced content strategy with all integrations."""
try:
logger.info(f"Creating enhanced strategy for user: {user_id}")
# Validate strategy data
validation_result = self.validation_service.validate_strategy_data(strategy_data)
if not validation_result['is_valid']:
logger.error(f"Strategy validation failed: {validation_result['errors']}")
raise ValueError(f"Invalid strategy data: {'; '.join(validation_result['errors'])}")
# Process onboarding data
onboarding_data = await self._process_onboarding_data(user_id, db)
# Transform onboarding data to fields
field_transformations = self.field_transformation_service.transform_onboarding_data_to_fields(onboarding_data)
# Merge strategy data with onboarding data
enhanced_strategy_data = self._merge_strategy_with_onboarding(strategy_data, field_transformations)
# Create strategy object
strategy = EnhancedContentStrategy(
user_id=user_id,
**enhanced_strategy_data,
created_at=datetime.utcnow(),
updated_at=datetime.utcnow()
)
# Save to database
db.add(strategy)
db.commit()
db.refresh(strategy)
# Generate AI recommendations
await self.ai_recommendations_service.generate_comprehensive_recommendations(strategy, db)
# Cache strategy data
await self.caching_service.cache_strategy(strategy.id, strategy.to_dict())
logger.info(f"Enhanced strategy created successfully: {strategy.id}")
return strategy
except Exception as e:
logger.error(f"Error creating enhanced strategy: {str(e)}")
db.rollback()
raise
async def get_enhanced_strategy(self, strategy_id: int, db: Session) -> Optional[EnhancedContentStrategy]:
"""Get enhanced strategy with cached data."""
try:
# Try to get from cache first
cached_strategy = await self.caching_service.get_cached_strategy(strategy_id)
if cached_strategy:
logger.info(f"Retrieved strategy {strategy_id} from cache")
return cached_strategy
# Get from database
strategy = db.query(EnhancedContentStrategy).filter(
EnhancedContentStrategy.id == strategy_id
).first()
if strategy:
# Cache the strategy
await self.caching_service.cache_strategy(strategy_id, strategy.to_dict())
return strategy
except Exception as e:
logger.error(f"Error getting enhanced strategy: {str(e)}")
return None
async def update_enhanced_strategy(self, strategy_id: int, update_data: Dict[str, Any], db: Session) -> Optional[EnhancedContentStrategy]:
"""Update enhanced strategy."""
try:
strategy = db.query(EnhancedContentStrategy).filter(
EnhancedContentStrategy.id == strategy_id
).first()
if not strategy:
return None
# Validate update data
validation_result = self.validation_service.validate_strategy_data(update_data)
if not validation_result['is_valid']:
logger.error(f"Strategy update validation failed: {validation_result['errors']}")
raise ValueError(f"Invalid update data: {'; '.join(validation_result['errors'])}")
# Update strategy fields
for field, value in update_data.items():
if hasattr(strategy, field):
setattr(strategy, field, value)
strategy.updated_at = datetime.utcnow()
# Save to database
db.commit()
db.refresh(strategy)
# Invalidate cache
await self.caching_service.invalidate_cache('strategy_cache', str(strategy_id))
# Regenerate AI recommendations if needed
if self._should_regenerate_ai_recommendations(update_data):
await self.ai_recommendations_service.generate_comprehensive_recommendations(strategy, db)
logger.info(f"Enhanced strategy updated successfully: {strategy_id}")
return strategy
except Exception as e:
logger.error(f"Error updating enhanced strategy: {str(e)}")
db.rollback()
raise
async def get_onboarding_data(self, user_id: int, db: Session) -> Dict[str, Any]:
"""Get onboarding data for auto-population."""
try:
# Try to get from cache first
cached_data = await self.caching_service.get_cached_onboarding_data(user_id)
if cached_data:
logger.info(f"Retrieved onboarding data for user {user_id} from cache")
return cached_data
# Process onboarding data
onboarding_data = await self._process_onboarding_data(user_id, db)
# Cache the data
await self.caching_service.cache_onboarding_data(user_id, onboarding_data)
return onboarding_data
except Exception as e:
logger.error(f"Error getting onboarding data: {str(e)}")
return {}
async def get_ai_analysis(self, strategy_id: int, analysis_type: str, db: Session) -> Optional[Dict[str, Any]]:
"""Get AI analysis results."""
try:
# Try to get from cache first
cached_analysis = await self.caching_service.get_cached_ai_analysis(strategy_id, analysis_type)
if cached_analysis:
logger.info(f"Retrieved AI analysis for strategy {strategy_id} from cache")
return cached_analysis
# Get from database
analysis = db.query(EnhancedAIAnalysisResult).filter(
EnhancedAIAnalysisResult.strategy_id == strategy_id,
EnhancedAIAnalysisResult.analysis_type == analysis_type
).order_by(EnhancedAIAnalysisResult.created_at.desc()).first()
if analysis:
analysis_data = analysis.to_dict()
# Cache the analysis
await self.caching_service.cache_ai_analysis(strategy_id, analysis_type, analysis_data)
return analysis_data
return None
except Exception as e:
logger.error(f"Error getting AI analysis: {str(e)}")
return None
async def get_system_health(self, db: Session) -> Dict[str, Any]:
"""Get system health status."""
try:
return await self.health_monitoring_service.check_system_health(
db,
self.caching_service,
self.ai_recommendations_service
)
except Exception as e:
logger.error(f"Error getting system health: {str(e)}")
return {
'overall_status': 'error',
'error': str(e),
'timestamp': datetime.utcnow().isoformat()
}
async def get_performance_report(self) -> Dict[str, Any]:
"""Get performance optimization report."""
try:
return await self.performance_optimization_service.get_performance_report()
except Exception as e:
logger.error(f"Error getting performance report: {str(e)}")
return {
'error': str(e),
'timestamp': datetime.utcnow().isoformat()
}
async def _process_onboarding_data(self, user_id: int, db: Session) -> Dict[str, Any]:
"""Process onboarding data for a user."""
try:
# Get integrated onboarding data
integrated_data = await self.onboarding_data_service.process_onboarding_data(user_id, db)
# Assess data quality
quality_assessment = self.data_quality_service.assess_onboarding_data_quality(integrated_data)
# Add quality assessment to integrated data
integrated_data['quality_assessment'] = quality_assessment
return integrated_data
except Exception as e:
logger.error(f"Error processing onboarding data: {str(e)}")
return {}
def _merge_strategy_with_onboarding(self, strategy_data: Dict[str, Any], field_transformations: Dict[str, Any]) -> Dict[str, Any]:
"""Merge strategy data with onboarding field transformations."""
try:
merged_data = strategy_data.copy()
# Add auto-populated fields from onboarding data
if 'fields' in field_transformations:
for field_name, field_value in field_transformations['fields'].items():
if field_name not in merged_data or not merged_data[field_name]:
merged_data[field_name] = field_value
# Add data sources information
if 'sources' in field_transformations:
merged_data['data_sources'] = field_transformations['sources']
return merged_data
except Exception as e:
logger.error(f"Error merging strategy with onboarding: {str(e)}")
return strategy_data
def _should_regenerate_ai_recommendations(self, update_data: Dict[str, Any]) -> bool:
"""Determine if AI recommendations should be regenerated."""
try:
# Fields that would trigger AI recommendation regeneration
ai_trigger_fields = [
'business_objectives', 'target_metrics', 'content_budget',
'team_size', 'implementation_timeline', 'market_share',
'competitive_position', 'content_preferences', 'audience_pain_points',
'top_competitors', 'industry_trends'
]
return any(field in update_data for field in ai_trigger_fields)
except Exception as e:
logger.error(f"Error checking if AI recommendations should be regenerated: {str(e)}")
return False
def get_strategic_input_fields(self) -> List[Dict[str, Any]]:
"""Get strategic input field definitions."""
return STRATEGIC_INPUT_FIELDS
def get_service_constants(self) -> Dict[str, Any]:
"""Get service configuration constants."""
return SERVICE_CONSTANTS
async def validate_strategy_data(self, strategy_data: Dict[str, Any]) -> Dict[str, Any]:
"""Validate strategy data using the validation service."""
try:
return self.validation_service.validate_strategy_data(strategy_data)
except Exception as e:
logger.error(f"Error validating strategy data: {str(e)}")
return {
'is_valid': False,
'errors': [f"Validation error: {str(e)}"],
'warnings': [],
'field_validations': {},
'validation_timestamp': datetime.utcnow().isoformat()
}
async def process_data_for_output(self, data: Dict[str, Any], output_format: str = 'json') -> Union[str, Dict[str, Any]]:
"""Process data for different output formats."""
try:
return self.data_processor_service.format_data_for_output(data, output_format)
except Exception as e:
logger.error(f"Error processing data for output: {str(e)}")
return str(data)
async def optimize_strategy_operation(self, operation_name: str, operation_func, *args, **kwargs) -> Dict[str, Any]:
"""Optimize strategy operations with performance monitoring."""
try:
return await self.performance_optimization_service.optimize_response_time(
operation_name, operation_func, *args, **kwargs
)
except Exception as e:
logger.error(f"Error optimizing strategy operation: {str(e)}")
return {
'result': None,
'response_time': 0.0,
'optimization_suggestions': ['Error occurred during optimization'],
'performance_status': 'error'
}

View File

@@ -0,0 +1,10 @@
"""
Onboarding Module
Onboarding data integration and processing services.
"""
from .data_integration import OnboardingDataIntegrationService
from .field_transformation import FieldTransformationService
from .data_quality import DataQualityService
__all__ = ['OnboardingDataIntegrationService', 'FieldTransformationService', 'DataQualityService']

View File

@@ -0,0 +1,381 @@
"""
Onboarding Data Integration Service
Onboarding data integration and processing.
"""
import logging
from typing import Dict, Any, Optional, List
from datetime import datetime, timedelta
from sqlalchemy.orm import Session
# Import database models
from models.enhanced_strategy_models import (
OnboardingDataIntegration
)
from models.onboarding import (
OnboardingSession,
WebsiteAnalysis,
ResearchPreferences,
APIKey
)
logger = logging.getLogger(__name__)
class OnboardingDataIntegrationService:
"""Service for onboarding data integration and processing."""
def __init__(self):
self.data_freshness_threshold = timedelta(hours=24)
self.max_analysis_age = timedelta(days=7)
async def process_onboarding_data(self, user_id: int, db: Session) -> Dict[str, Any]:
"""Process and integrate all onboarding data for a user."""
try:
logger.info(f"Processing onboarding data for user: {user_id}")
# Get all onboarding data sources
website_analysis = self._get_website_analysis(user_id, db)
research_preferences = self._get_research_preferences(user_id, db)
api_keys_data = self._get_api_keys_data(user_id, db)
onboarding_session = self._get_onboarding_session(user_id, db)
# Process and integrate data
integrated_data = {
'website_analysis': website_analysis,
'research_preferences': research_preferences,
'api_keys_data': api_keys_data,
'onboarding_session': onboarding_session,
'data_quality': self._assess_data_quality(website_analysis, research_preferences, api_keys_data),
'processing_timestamp': datetime.utcnow().isoformat()
}
# Store integrated data
await self._store_integrated_data(user_id, integrated_data, db)
logger.info(f"Onboarding data processed successfully for user: {user_id}")
return integrated_data
except Exception as e:
logger.error(f"Error processing onboarding data for user {user_id}: {str(e)}")
return self._get_fallback_data()
def _get_website_analysis(self, user_id: int, db: Session) -> Dict[str, Any]:
"""Get website analysis data for the user."""
try:
# Get the latest onboarding session for the user
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not session:
logger.warning(f"No onboarding session found for user {user_id}")
return {}
# Get the latest website analysis for this session
website_analysis = db.query(WebsiteAnalysis).filter(
WebsiteAnalysis.session_id == session.id
).order_by(WebsiteAnalysis.updated_at.desc()).first()
if not website_analysis:
logger.warning(f"No website analysis found for user {user_id}")
return {}
# Convert to dictionary and add metadata
analysis_data = website_analysis.to_dict()
analysis_data['data_freshness'] = self._calculate_freshness(website_analysis.updated_at)
analysis_data['confidence_level'] = 0.9 if website_analysis.status == 'completed' else 0.5
logger.info(f"Retrieved website analysis for user {user_id}: {website_analysis.website_url}")
return analysis_data
except Exception as e:
logger.error(f"Error getting website analysis for user {user_id}: {str(e)}")
return {}
def _get_research_preferences(self, user_id: int, db: Session) -> Dict[str, Any]:
"""Get research preferences data for the user."""
try:
# Get the latest onboarding session for the user
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not session:
logger.warning(f"No onboarding session found for user {user_id}")
return {}
# Get research preferences for this session
research_prefs = db.query(ResearchPreferences).filter(
ResearchPreferences.session_id == session.id
).first()
if not research_prefs:
logger.warning(f"No research preferences found for user {user_id}")
return {}
# Convert to dictionary and add metadata
prefs_data = research_prefs.to_dict()
prefs_data['data_freshness'] = self._calculate_freshness(research_prefs.updated_at)
prefs_data['confidence_level'] = 0.9
logger.info(f"Retrieved research preferences for user {user_id}")
return prefs_data
except Exception as e:
logger.error(f"Error getting research preferences for user {user_id}: {str(e)}")
return {}
def _get_api_keys_data(self, user_id: int, db: Session) -> Dict[str, Any]:
"""Get API keys data for the user."""
try:
# Get the latest onboarding session for the user
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not session:
logger.warning(f"No onboarding session found for user {user_id}")
return {}
# Get all API keys for this session
api_keys = db.query(APIKey).filter(
APIKey.session_id == session.id
).all()
if not api_keys:
logger.warning(f"No API keys found for user {user_id}")
return {}
# Convert to dictionary format
api_data = {
'api_keys': [key.to_dict() for key in api_keys],
'total_keys': len(api_keys),
'providers': [key.provider for key in api_keys],
'data_freshness': self._calculate_freshness(session.updated_at),
'confidence_level': 0.8
}
logger.info(f"Retrieved {len(api_keys)} API keys for user {user_id}")
return api_data
except Exception as e:
logger.error(f"Error getting API keys data for user {user_id}: {str(e)}")
return {}
def _get_onboarding_session(self, user_id: int, db: Session) -> Dict[str, Any]:
"""Get onboarding session data for the user."""
try:
# Get the latest onboarding session for the user
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not session:
logger.warning(f"No onboarding session found for user {user_id}")
return {}
# Convert to dictionary
session_data = {
'id': session.id,
'user_id': session.user_id,
'current_step': session.current_step,
'progress': session.progress,
'started_at': session.started_at.isoformat() if session.started_at else None,
'updated_at': session.updated_at.isoformat() if session.updated_at else None,
'data_freshness': self._calculate_freshness(session.updated_at),
'confidence_level': 0.9
}
logger.info(f"Retrieved onboarding session for user {user_id}: step {session.current_step}, progress {session.progress}%")
return session_data
except Exception as e:
logger.error(f"Error getting onboarding session for user {user_id}: {str(e)}")
return {}
def _assess_data_quality(self, website_analysis: Dict, research_preferences: Dict, api_keys_data: Dict) -> Dict[str, Any]:
"""Assess the quality and completeness of onboarding data."""
try:
quality_metrics = {
'overall_score': 0.0,
'completeness': 0.0,
'freshness': 0.0,
'relevance': 0.0,
'confidence': 0.0
}
# Calculate completeness
total_fields = 0
filled_fields = 0
# Website analysis completeness
website_fields = ['domain', 'industry', 'business_type', 'target_audience', 'content_goals']
for field in website_fields:
total_fields += 1
if website_analysis.get(field):
filled_fields += 1
# Research preferences completeness
research_fields = ['research_topics', 'content_types', 'target_audience', 'industry_focus']
for field in research_fields:
total_fields += 1
if research_preferences.get(field):
filled_fields += 1
# API keys completeness
total_fields += 1
if api_keys_data:
filled_fields += 1
quality_metrics['completeness'] = filled_fields / total_fields if total_fields > 0 else 0.0
# Calculate freshness
freshness_scores = []
for data_source in [website_analysis, research_preferences]:
if data_source.get('data_freshness'):
freshness_scores.append(data_source['data_freshness'])
quality_metrics['freshness'] = sum(freshness_scores) / len(freshness_scores) if freshness_scores else 0.0
# Calculate relevance (based on data presence and quality)
relevance_score = 0.0
if website_analysis.get('domain'):
relevance_score += 0.4
if research_preferences.get('research_topics'):
relevance_score += 0.3
if api_keys_data:
relevance_score += 0.3
quality_metrics['relevance'] = relevance_score
# Calculate confidence
quality_metrics['confidence'] = (quality_metrics['completeness'] + quality_metrics['freshness'] + quality_metrics['relevance']) / 3
# Calculate overall score
quality_metrics['overall_score'] = quality_metrics['confidence']
return quality_metrics
except Exception as e:
logger.error(f"Error assessing data quality: {str(e)}")
return {
'overall_score': 0.0,
'completeness': 0.0,
'freshness': 0.0,
'relevance': 0.0,
'confidence': 0.0
}
def _calculate_freshness(self, created_at: datetime) -> float:
"""Calculate data freshness score (0.0 to 1.0)."""
try:
age = datetime.utcnow() - created_at
if age <= self.data_freshness_threshold:
return 1.0
elif age <= self.max_analysis_age:
# Linear decay from 1.0 to 0.5
decay_factor = 1.0 - (age - self.data_freshness_threshold) / (self.max_analysis_age - self.data_freshness_threshold) * 0.5
return max(0.5, decay_factor)
else:
return 0.5 # Minimum freshness for old data
except Exception as e:
logger.error(f"Error calculating data freshness: {str(e)}")
return 0.5
def _check_api_data_availability(self, api_key_data: Dict) -> bool:
"""Check if API key has available data."""
try:
# Check if API key has been used recently and has data
if api_key_data.get('last_used') and api_key_data.get('usage_count', 0) > 0:
return api_key_data.get('data_available', False)
return False
except Exception as e:
logger.error(f"Error checking API data availability: {str(e)}")
return False
async def _store_integrated_data(self, user_id: int, integrated_data: Dict[str, Any], db: Session) -> None:
"""Store integrated onboarding data."""
try:
# Create or update integrated data record
existing_record = db.query(OnboardingDataIntegration).filter(
OnboardingDataIntegration.user_id == user_id
).first()
if existing_record:
existing_record.website_analysis_data = integrated_data.get('website_analysis', {})
existing_record.research_preferences_data = integrated_data.get('research_preferences', {})
existing_record.api_keys_data = integrated_data.get('api_keys_data', {})
existing_record.updated_at = datetime.utcnow()
else:
new_record = OnboardingDataIntegration(
user_id=user_id,
website_analysis_data=integrated_data.get('website_analysis', {}),
research_preferences_data=integrated_data.get('research_preferences', {}),
api_keys_data=integrated_data.get('api_keys_data', {}),
created_at=datetime.utcnow(),
updated_at=datetime.utcnow()
)
db.add(new_record)
db.commit()
logger.info(f"Integrated onboarding data stored for user: {user_id}")
except Exception as e:
logger.error(f"Error storing integrated data for user {user_id}: {str(e)}")
db.rollback()
def _get_fallback_data(self) -> Dict[str, Any]:
"""Get fallback data when processing fails."""
return {
'website_analysis': {},
'research_preferences': {},
'api_keys_data': {},
'onboarding_session': {},
'data_quality': {
'overall_score': 0.0,
'completeness': 0.0,
'freshness': 0.0,
'relevance': 0.0,
'confidence': 0.0
},
'processing_timestamp': datetime.utcnow().isoformat()
}
async def get_integrated_data(self, user_id: int, db: Session) -> Optional[Dict[str, Any]]:
"""Get previously integrated onboarding data for a user."""
try:
record = db.query(OnboardingDataIntegration).filter(
OnboardingDataIntegration.user_id == user_id
).first()
if record:
# Reconstruct integrated data from stored fields
integrated_data = {
'website_analysis': record.website_analysis_data or {},
'research_preferences': record.research_preferences_data or {},
'api_keys_data': record.api_keys_data or {},
'onboarding_session': {},
'data_quality': self._assess_data_quality(
record.website_analysis_data or {},
record.research_preferences_data or {},
record.api_keys_data or {}
),
'processing_timestamp': record.updated_at.isoformat()
}
# Check if data is still fresh
updated_at = record.updated_at
if datetime.utcnow() - updated_at <= self.data_freshness_threshold:
return integrated_data
else:
logger.info(f"Integrated data is stale for user {user_id}, reprocessing...")
return await self.process_onboarding_data(user_id, db)
return None
except Exception as e:
logger.error(f"Error getting integrated data for user {user_id}: {str(e)}")
return None

View File

@@ -0,0 +1,547 @@
"""
Data Quality Service
Onboarding data quality assessment.
"""
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime, timedelta
logger = logging.getLogger(__name__)
class DataQualityService:
"""Service for assessing data quality and validation."""
def __init__(self):
self.quality_thresholds = {
'excellent': 0.9,
'good': 0.7,
'fair': 0.5,
'poor': 0.3
}
self.data_freshness_threshold = timedelta(hours=24)
self.max_data_age = timedelta(days=30)
def assess_onboarding_data_quality(self, integrated_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess the overall quality of onboarding data."""
try:
logger.info("Assessing onboarding data quality")
quality_assessment = {
'overall_score': 0.0,
'completeness': 0.0,
'freshness': 0.0,
'accuracy': 0.0,
'relevance': 0.0,
'consistency': 0.0,
'confidence': 0.0,
'quality_level': 'poor',
'recommendations': [],
'issues': [],
'assessment_timestamp': datetime.utcnow().isoformat()
}
# Assess each data source
website_quality = self._assess_website_analysis_quality(integrated_data.get('website_analysis', {}))
research_quality = self._assess_research_preferences_quality(integrated_data.get('research_preferences', {}))
api_quality = self._assess_api_keys_quality(integrated_data.get('api_keys_data', {}))
session_quality = self._assess_onboarding_session_quality(integrated_data.get('onboarding_session', {}))
# Calculate overall quality metrics
quality_assessment['completeness'] = self._calculate_completeness_score(
website_quality, research_quality, api_quality, session_quality
)
quality_assessment['freshness'] = self._calculate_freshness_score(
website_quality, research_quality, api_quality, session_quality
)
quality_assessment['accuracy'] = self._calculate_accuracy_score(
website_quality, research_quality, api_quality, session_quality
)
quality_assessment['relevance'] = self._calculate_relevance_score(
website_quality, research_quality, api_quality, session_quality
)
quality_assessment['consistency'] = self._calculate_consistency_score(
website_quality, research_quality, api_quality, session_quality
)
# Calculate confidence and overall score
quality_assessment['confidence'] = (
quality_assessment['completeness'] +
quality_assessment['freshness'] +
quality_assessment['accuracy'] +
quality_assessment['relevance'] +
quality_assessment['consistency']
) / 5
quality_assessment['overall_score'] = quality_assessment['confidence']
# Determine quality level
quality_assessment['quality_level'] = self._determine_quality_level(quality_assessment['overall_score'])
# Generate recommendations and identify issues
quality_assessment['recommendations'] = self._generate_quality_recommendations(quality_assessment)
quality_assessment['issues'] = self._identify_quality_issues(quality_assessment)
logger.info(f"Data quality assessment completed. Overall score: {quality_assessment['overall_score']:.2f}")
return quality_assessment
except Exception as e:
logger.error(f"Error assessing data quality: {str(e)}")
return self._get_fallback_quality_assessment()
def _assess_website_analysis_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess quality of website analysis data."""
try:
quality_metrics = {
'completeness': 0.0,
'freshness': 0.0,
'accuracy': 0.0,
'relevance': 0.0,
'consistency': 0.0
}
if not website_data:
return quality_metrics
# Completeness assessment
required_fields = ['domain', 'industry', 'business_type', 'target_audience', 'content_goals']
present_fields = sum(1 for field in required_fields if website_data.get(field))
quality_metrics['completeness'] = present_fields / len(required_fields)
# Freshness assessment
if website_data.get('created_at'):
try:
created_at = datetime.fromisoformat(website_data['created_at'].replace('Z', '+00:00'))
age = datetime.utcnow() - created_at
quality_metrics['freshness'] = self._calculate_freshness_score_from_age(age)
except Exception:
quality_metrics['freshness'] = 0.5
# Accuracy assessment (based on data presence and format)
accuracy_score = 0.0
if website_data.get('domain') and isinstance(website_data['domain'], str):
accuracy_score += 0.2
if website_data.get('industry') and isinstance(website_data['industry'], str):
accuracy_score += 0.2
if website_data.get('business_type') and isinstance(website_data['business_type'], str):
accuracy_score += 0.2
if website_data.get('target_audience') and isinstance(website_data['target_audience'], str):
accuracy_score += 0.2
if website_data.get('content_goals') and isinstance(website_data['content_goals'], (str, list)):
accuracy_score += 0.2
quality_metrics['accuracy'] = accuracy_score
# Relevance assessment
relevance_score = 0.0
if website_data.get('domain'):
relevance_score += 0.3
if website_data.get('industry'):
relevance_score += 0.3
if website_data.get('content_goals'):
relevance_score += 0.4
quality_metrics['relevance'] = relevance_score
# Consistency assessment
consistency_score = 0.0
if website_data.get('domain') and website_data.get('industry'):
consistency_score += 0.5
if website_data.get('target_audience') and website_data.get('content_goals'):
consistency_score += 0.5
quality_metrics['consistency'] = consistency_score
return quality_metrics
except Exception as e:
logger.error(f"Error assessing website analysis quality: {str(e)}")
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
def _assess_research_preferences_quality(self, research_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess quality of research preferences data."""
try:
quality_metrics = {
'completeness': 0.0,
'freshness': 0.0,
'accuracy': 0.0,
'relevance': 0.0,
'consistency': 0.0
}
if not research_data:
return quality_metrics
# Completeness assessment
required_fields = ['research_topics', 'content_types', 'target_audience', 'industry_focus']
present_fields = sum(1 for field in required_fields if research_data.get(field))
quality_metrics['completeness'] = present_fields / len(required_fields)
# Freshness assessment
if research_data.get('created_at'):
try:
created_at = datetime.fromisoformat(research_data['created_at'].replace('Z', '+00:00'))
age = datetime.utcnow() - created_at
quality_metrics['freshness'] = self._calculate_freshness_score_from_age(age)
except Exception:
quality_metrics['freshness'] = 0.5
# Accuracy assessment
accuracy_score = 0.0
if research_data.get('research_topics') and isinstance(research_data['research_topics'], (str, list)):
accuracy_score += 0.25
if research_data.get('content_types') and isinstance(research_data['content_types'], (str, list)):
accuracy_score += 0.25
if research_data.get('target_audience') and isinstance(research_data['target_audience'], str):
accuracy_score += 0.25
if research_data.get('industry_focus') and isinstance(research_data['industry_focus'], str):
accuracy_score += 0.25
quality_metrics['accuracy'] = accuracy_score
# Relevance assessment
relevance_score = 0.0
if research_data.get('research_topics'):
relevance_score += 0.4
if research_data.get('content_types'):
relevance_score += 0.3
if research_data.get('target_audience'):
relevance_score += 0.3
quality_metrics['relevance'] = relevance_score
# Consistency assessment
consistency_score = 0.0
if research_data.get('research_topics') and research_data.get('content_types'):
consistency_score += 0.5
if research_data.get('target_audience') and research_data.get('industry_focus'):
consistency_score += 0.5
quality_metrics['consistency'] = consistency_score
return quality_metrics
except Exception as e:
logger.error(f"Error assessing research preferences quality: {str(e)}")
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
def _assess_api_keys_quality(self, api_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess quality of API keys data."""
try:
quality_metrics = {
'completeness': 0.0,
'freshness': 0.0,
'accuracy': 0.0,
'relevance': 0.0,
'consistency': 0.0
}
if not api_data:
return quality_metrics
# Completeness assessment
total_apis = len(api_data)
active_apis = sum(1 for api_info in api_data.values() if api_info.get('is_active'))
quality_metrics['completeness'] = active_apis / max(total_apis, 1)
# Freshness assessment
freshness_scores = []
for api_info in api_data.values():
if api_info.get('last_used'):
try:
last_used = datetime.fromisoformat(api_info['last_used'].replace('Z', '+00:00'))
age = datetime.utcnow() - last_used
freshness_scores.append(self._calculate_freshness_score_from_age(age))
except Exception:
freshness_scores.append(0.5)
quality_metrics['freshness'] = sum(freshness_scores) / len(freshness_scores) if freshness_scores else 0.5
# Accuracy assessment
accuracy_score = 0.0
for api_info in api_data.values():
if api_info.get('service_name') and api_info.get('is_active'):
accuracy_score += 0.5
if api_info.get('data_available'):
accuracy_score += 0.5
quality_metrics['accuracy'] = accuracy_score / max(len(api_data), 1)
# Relevance assessment
relevant_apis = ['google_analytics', 'google_search_console', 'semrush', 'ahrefs', 'moz']
relevant_count = sum(1 for api_name in api_data.keys() if api_name.lower() in relevant_apis)
quality_metrics['relevance'] = relevant_count / max(len(api_data), 1)
# Consistency assessment
consistency_score = 0.0
if len(api_data) > 0:
consistency_score = 0.5 # Basic consistency if APIs exist
if any(api_info.get('data_available') for api_info in api_data.values()):
consistency_score += 0.5
quality_metrics['consistency'] = consistency_score
return quality_metrics
except Exception as e:
logger.error(f"Error assessing API keys quality: {str(e)}")
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
def _assess_onboarding_session_quality(self, session_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess quality of onboarding session data."""
try:
quality_metrics = {
'completeness': 0.0,
'freshness': 0.0,
'accuracy': 0.0,
'relevance': 0.0,
'consistency': 0.0
}
if not session_data:
return quality_metrics
# Completeness assessment
required_fields = ['session_id', 'completion_percentage', 'completed_steps', 'current_step']
present_fields = sum(1 for field in required_fields if session_data.get(field))
quality_metrics['completeness'] = present_fields / len(required_fields)
# Freshness assessment
if session_data.get('updated_at'):
try:
updated_at = datetime.fromisoformat(session_data['updated_at'].replace('Z', '+00:00'))
age = datetime.utcnow() - updated_at
quality_metrics['freshness'] = self._calculate_freshness_score_from_age(age)
except Exception:
quality_metrics['freshness'] = 0.5
# Accuracy assessment
accuracy_score = 0.0
if session_data.get('session_id') and isinstance(session_data['session_id'], str):
accuracy_score += 0.25
if session_data.get('completion_percentage') and isinstance(session_data['completion_percentage'], (int, float)):
accuracy_score += 0.25
if session_data.get('completed_steps') and isinstance(session_data['completed_steps'], (list, int)):
accuracy_score += 0.25
if session_data.get('current_step') and isinstance(session_data['current_step'], (str, int)):
accuracy_score += 0.25
quality_metrics['accuracy'] = accuracy_score
# Relevance assessment
relevance_score = 0.0
if session_data.get('completion_percentage', 0) > 50:
relevance_score += 0.5
if session_data.get('session_data'):
relevance_score += 0.5
quality_metrics['relevance'] = relevance_score
# Consistency assessment
consistency_score = 0.0
if session_data.get('completion_percentage') and session_data.get('completed_steps'):
consistency_score += 0.5
if session_data.get('current_step') and session_data.get('session_id'):
consistency_score += 0.5
quality_metrics['consistency'] = consistency_score
return quality_metrics
except Exception as e:
logger.error(f"Error assessing onboarding session quality: {str(e)}")
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
def _calculate_completeness_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
"""Calculate overall completeness score."""
try:
scores = [
website_quality['completeness'],
research_quality['completeness'],
api_quality['completeness'],
session_quality['completeness']
]
return sum(scores) / len(scores)
except Exception as e:
logger.error(f"Error calculating completeness score: {str(e)}")
return 0.0
def _calculate_freshness_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
"""Calculate overall freshness score."""
try:
scores = [
website_quality['freshness'],
research_quality['freshness'],
api_quality['freshness'],
session_quality['freshness']
]
return sum(scores) / len(scores)
except Exception as e:
logger.error(f"Error calculating freshness score: {str(e)}")
return 0.0
def _calculate_accuracy_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
"""Calculate overall accuracy score."""
try:
scores = [
website_quality['accuracy'],
research_quality['accuracy'],
api_quality['accuracy'],
session_quality['accuracy']
]
return sum(scores) / len(scores)
except Exception as e:
logger.error(f"Error calculating accuracy score: {str(e)}")
return 0.0
def _calculate_relevance_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
"""Calculate overall relevance score."""
try:
scores = [
website_quality['relevance'],
research_quality['relevance'],
api_quality['relevance'],
session_quality['relevance']
]
return sum(scores) / len(scores)
except Exception as e:
logger.error(f"Error calculating relevance score: {str(e)}")
return 0.0
def _calculate_consistency_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
"""Calculate overall consistency score."""
try:
scores = [
website_quality['consistency'],
research_quality['consistency'],
api_quality['consistency'],
session_quality['consistency']
]
return sum(scores) / len(scores)
except Exception as e:
logger.error(f"Error calculating consistency score: {str(e)}")
return 0.0
def _calculate_freshness_score_from_age(self, age: timedelta) -> float:
"""Calculate freshness score based on data age."""
try:
if age <= self.data_freshness_threshold:
return 1.0
elif age <= self.max_data_age:
# Linear decay from 1.0 to 0.5
decay_factor = 1.0 - (age - self.data_freshness_threshold) / (self.max_data_age - self.data_freshness_threshold) * 0.5
return max(0.5, decay_factor)
else:
return 0.5 # Minimum freshness for old data
except Exception as e:
logger.error(f"Error calculating freshness score from age: {str(e)}")
return 0.5
def _determine_quality_level(self, overall_score: float) -> str:
"""Determine quality level based on overall score."""
try:
if overall_score >= self.quality_thresholds['excellent']:
return 'excellent'
elif overall_score >= self.quality_thresholds['good']:
return 'good'
elif overall_score >= self.quality_thresholds['fair']:
return 'fair'
else:
return 'poor'
except Exception as e:
logger.error(f"Error determining quality level: {str(e)}")
return 'poor'
def _generate_quality_recommendations(self, quality_assessment: Dict[str, Any]) -> List[str]:
"""Generate recommendations based on quality assessment."""
try:
recommendations = []
if quality_assessment['completeness'] < 0.7:
recommendations.append("Complete missing onboarding data to improve strategy accuracy")
if quality_assessment['freshness'] < 0.7:
recommendations.append("Update stale data to ensure current market insights")
if quality_assessment['accuracy'] < 0.7:
recommendations.append("Verify data accuracy for better strategy recommendations")
if quality_assessment['relevance'] < 0.7:
recommendations.append("Provide more relevant data for targeted strategy development")
if quality_assessment['consistency'] < 0.7:
recommendations.append("Ensure data consistency across different sources")
if quality_assessment['overall_score'] < 0.5:
recommendations.append("Consider re-running onboarding process for better data quality")
return recommendations
except Exception as e:
logger.error(f"Error generating quality recommendations: {str(e)}")
return ["Unable to generate recommendations due to assessment error"]
def _identify_quality_issues(self, quality_assessment: Dict[str, Any]) -> List[str]:
"""Identify specific quality issues."""
try:
issues = []
if quality_assessment['completeness'] < 0.5:
issues.append("Incomplete data: Missing critical onboarding information")
if quality_assessment['freshness'] < 0.5:
issues.append("Stale data: Information may be outdated")
if quality_assessment['accuracy'] < 0.5:
issues.append("Data accuracy concerns: Verify information validity")
if quality_assessment['relevance'] < 0.5:
issues.append("Low relevance: Data may not align with current needs")
if quality_assessment['consistency'] < 0.5:
issues.append("Inconsistent data: Conflicting information detected")
return issues
except Exception as e:
logger.error(f"Error identifying quality issues: {str(e)}")
return ["Unable to identify issues due to assessment error"]
def _get_fallback_quality_assessment(self) -> Dict[str, Any]:
"""Get fallback quality assessment when assessment fails."""
return {
'overall_score': 0.0,
'completeness': 0.0,
'freshness': 0.0,
'accuracy': 0.0,
'relevance': 0.0,
'consistency': 0.0,
'confidence': 0.0,
'quality_level': 'poor',
'recommendations': ['Unable to assess data quality'],
'issues': ['Quality assessment failed'],
'assessment_timestamp': datetime.utcnow().isoformat()
}
def validate_field_data(self, field_data: Dict[str, Any]) -> Dict[str, Any]:
"""Validate individual field data."""
try:
validation_result = {
'is_valid': True,
'errors': [],
'warnings': [],
'confidence': 1.0
}
for field_name, field_value in field_data.items():
if field_value is None or field_value == '':
validation_result['errors'].append(f"Field '{field_name}' is empty")
validation_result['is_valid'] = False
elif isinstance(field_value, str) and len(field_value.strip()) < 3:
validation_result['warnings'].append(f"Field '{field_name}' may be too short")
validation_result['confidence'] *= 0.9
return validation_result
except Exception as e:
logger.error(f"Error validating field data: {str(e)}")
return {
'is_valid': False,
'errors': ['Validation failed'],
'warnings': [],
'confidence': 0.0
}

View File

@@ -0,0 +1,790 @@
"""
Field Transformation Service
Onboarding data to field mapping.
"""
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime
logger = logging.getLogger(__name__)
class FieldTransformationService:
"""Service for transforming onboarding data to strategic input fields."""
def __init__(self):
# Define field mapping configurations
self.field_mappings = {
# Business Context mappings
'business_objectives': {
'sources': ['website_analysis.content_goals', 'research_preferences.research_topics'],
'transformation': 'extract_business_objectives'
},
'target_metrics': {
'sources': ['website_analysis.performance_metrics', 'research_preferences.performance_tracking'],
'transformation': 'extract_target_metrics'
},
'content_budget': {
'sources': ['onboarding_session.session_data.budget'],
'transformation': 'extract_budget'
},
'team_size': {
'sources': ['onboarding_session.session_data.team_size'],
'transformation': 'extract_team_size'
},
'implementation_timeline': {
'sources': ['onboarding_session.session_data.timeline'],
'transformation': 'extract_timeline'
},
'market_share': {
'sources': ['website_analysis.performance_metrics'],
'transformation': 'extract_market_share'
},
'competitive_position': {
'sources': ['website_analysis.competitors', 'research_preferences.competitor_analysis'],
'transformation': 'extract_competitive_position'
},
'performance_metrics': {
'sources': ['website_analysis.performance_metrics'],
'transformation': 'extract_performance_metrics'
},
# Audience Intelligence mappings
'content_preferences': {
'sources': ['research_preferences.content_types'],
'transformation': 'extract_content_preferences'
},
'consumption_patterns': {
'sources': ['website_analysis.target_audience', 'research_preferences.target_audience'],
'transformation': 'extract_consumption_patterns'
},
'audience_pain_points': {
'sources': ['website_analysis.content_gaps', 'research_preferences.research_topics'],
'transformation': 'extract_pain_points'
},
'buying_journey': {
'sources': ['website_analysis.target_audience', 'research_preferences.target_audience'],
'transformation': 'extract_buying_journey'
},
'seasonal_trends': {
'sources': ['research_preferences.trend_analysis'],
'transformation': 'extract_seasonal_trends'
},
'engagement_metrics': {
'sources': ['website_analysis.performance_metrics'],
'transformation': 'extract_engagement_metrics'
},
# Competitive Intelligence mappings
'top_competitors': {
'sources': ['website_analysis.competitors'],
'transformation': 'extract_competitors'
},
'competitor_content_strategies': {
'sources': ['website_analysis.competitors', 'research_preferences.competitor_analysis'],
'transformation': 'extract_competitor_strategies'
},
'market_gaps': {
'sources': ['website_analysis.content_gaps', 'research_preferences.research_topics'],
'transformation': 'extract_market_gaps'
},
'industry_trends': {
'sources': ['website_analysis.industry', 'research_preferences.industry_focus'],
'transformation': 'extract_industry_trends'
},
'emerging_trends': {
'sources': ['research_preferences.trend_analysis'],
'transformation': 'extract_emerging_trends'
},
# Content Strategy mappings
'preferred_formats': {
'sources': ['research_preferences.content_types'],
'transformation': 'extract_preferred_formats'
},
'content_mix': {
'sources': ['research_preferences.content_types', 'website_analysis.content_goals'],
'transformation': 'extract_content_mix'
},
'content_frequency': {
'sources': ['research_preferences.content_calendar'],
'transformation': 'extract_content_frequency'
},
'optimal_timing': {
'sources': ['research_preferences.content_calendar'],
'transformation': 'extract_optimal_timing'
},
'quality_metrics': {
'sources': ['website_analysis.performance_metrics'],
'transformation': 'extract_quality_metrics'
},
'editorial_guidelines': {
'sources': ['website_analysis.business_type', 'research_preferences.content_types'],
'transformation': 'extract_editorial_guidelines'
},
'brand_voice': {
'sources': ['website_analysis.business_type', 'onboarding_session.session_data.brand_voice'],
'transformation': 'extract_brand_voice'
},
# Performance Analytics mappings
'traffic_sources': {
'sources': ['website_analysis.performance_metrics'],
'transformation': 'extract_traffic_sources'
},
'conversion_rates': {
'sources': ['website_analysis.performance_metrics'],
'transformation': 'extract_conversion_rates'
},
'content_roi_targets': {
'sources': ['onboarding_session.session_data.budget', 'website_analysis.performance_metrics'],
'transformation': 'extract_roi_targets'
},
'ab_testing_capabilities': {
'sources': ['onboarding_session.session_data.team_size'],
'transformation': 'extract_ab_testing_capabilities'
}
}
def transform_onboarding_data_to_fields(self, integrated_data: Dict[str, Any]) -> Dict[str, Any]:
"""Transform integrated onboarding data to strategic input fields."""
try:
logger.info("Transforming onboarding data to strategic fields")
transformed_fields = {}
data_sources = {}
for field_id, mapping_config in self.field_mappings.items():
try:
# Extract data from sources
source_data = self._extract_source_data(integrated_data, mapping_config['sources'])
if source_data:
# Apply transformation
transformation_method = getattr(self, mapping_config['transformation'])
transformed_value = transformation_method(source_data, integrated_data)
if transformed_value:
transformed_fields[field_id] = transformed_value
data_sources[field_id] = self._get_data_source_info(mapping_config['sources'], integrated_data)
except Exception as e:
logger.warning(f"Error transforming field {field_id}: {str(e)}")
continue
result = {
'fields': transformed_fields,
'sources': data_sources,
'transformation_metadata': {
'total_fields_processed': len(self.field_mappings),
'successful_transformations': len(transformed_fields),
'transformation_timestamp': datetime.utcnow().isoformat()
}
}
logger.info(f"Successfully transformed {len(transformed_fields)} fields from onboarding data")
return result
except Exception as e:
logger.error(f"Error transforming onboarding data to fields: {str(e)}")
return {'fields': {}, 'sources': {}, 'transformation_metadata': {'error': str(e)}}
def _extract_source_data(self, integrated_data: Dict[str, Any], sources: List[str]) -> Dict[str, Any]:
"""Extract data from specified sources."""
source_data = {}
for source_path in sources:
try:
# Navigate nested dictionary structure
keys = source_path.split('.')
value = integrated_data
for key in keys:
if isinstance(value, dict) and key in value:
value = value[key]
else:
value = None
break
if value is not None:
source_data[source_path] = value
except Exception as e:
logger.debug(f"Error extracting data from {source_path}: {str(e)}")
continue
return source_data
def _get_data_source_info(self, sources: List[str], integrated_data: Dict[str, Any]) -> Dict[str, Any]:
"""Get information about data sources for a field."""
source_info = {
'sources': sources,
'data_quality': self._assess_source_quality(sources, integrated_data),
'last_updated': datetime.utcnow().isoformat()
}
return source_info
def _assess_source_quality(self, sources: List[str], integrated_data: Dict[str, Any]) -> float:
"""Assess the quality of data sources."""
try:
quality_scores = []
for source in sources:
# Check if source exists and has data
keys = source.split('.')
value = integrated_data
for key in keys:
if isinstance(value, dict) and key in value:
value = value[key]
else:
value = None
break
if value:
# Basic quality assessment
if isinstance(value, (list, dict)) and len(value) > 0:
quality_scores.append(1.0)
elif isinstance(value, str) and len(value.strip()) > 0:
quality_scores.append(0.8)
else:
quality_scores.append(0.5)
else:
quality_scores.append(0.0)
return sum(quality_scores) / len(quality_scores) if quality_scores else 0.0
except Exception as e:
logger.error(f"Error assessing source quality: {str(e)}")
return 0.0
# Transformation methods for each field type
def extract_business_objectives(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract business objectives from content goals and research topics."""
try:
objectives = []
if 'website_analysis.content_goals' in source_data:
goals = source_data['website_analysis.content_goals']
if isinstance(goals, list):
objectives.extend(goals)
elif isinstance(goals, str):
objectives.append(goals)
if 'research_preferences.research_topics' in source_data:
topics = source_data['research_preferences.research_topics']
if isinstance(topics, list):
objectives.extend(topics)
elif isinstance(topics, str):
objectives.append(topics)
return ', '.join(objectives) if objectives else None
except Exception as e:
logger.error(f"Error extracting business objectives: {str(e)}")
return None
def extract_target_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract target metrics from performance data."""
try:
metrics = []
if 'website_analysis.performance_metrics' in source_data:
perf_metrics = source_data['website_analysis.performance_metrics']
if isinstance(perf_metrics, dict):
metrics.extend([f"{k}: {v}" for k, v in perf_metrics.items()])
elif isinstance(perf_metrics, str):
metrics.append(perf_metrics)
if 'research_preferences.performance_tracking' in source_data:
tracking = source_data['research_preferences.performance_tracking']
if isinstance(tracking, list):
metrics.extend(tracking)
elif isinstance(tracking, str):
metrics.append(tracking)
return ', '.join(metrics) if metrics else None
except Exception as e:
logger.error(f"Error extracting target metrics: {str(e)}")
return None
def extract_budget(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract content budget from session data."""
try:
if 'onboarding_session.session_data.budget' in source_data:
budget = source_data['onboarding_session.session_data.budget']
if budget:
return str(budget)
return None
except Exception as e:
logger.error(f"Error extracting budget: {str(e)}")
return None
def extract_team_size(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract team size from session data."""
try:
if 'onboarding_session.session_data.team_size' in source_data:
team_size = source_data['onboarding_session.session_data.team_size']
if team_size:
return str(team_size)
return None
except Exception as e:
logger.error(f"Error extracting team size: {str(e)}")
return None
def extract_timeline(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract implementation timeline from session data."""
try:
if 'onboarding_session.session_data.timeline' in source_data:
timeline = source_data['onboarding_session.session_data.timeline']
if timeline:
return str(timeline)
return None
except Exception as e:
logger.error(f"Error extracting timeline: {str(e)}")
return None
def extract_market_share(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract market share from performance metrics."""
try:
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict) and 'market_share' in metrics:
return str(metrics['market_share'])
return None
except Exception as e:
logger.error(f"Error extracting market share: {str(e)}")
return None
def extract_competitive_position(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract competitive position from competitor data."""
try:
position_indicators = []
if 'website_analysis.competitors' in source_data:
competitors = source_data['website_analysis.competitors']
if competitors:
position_indicators.append(f"Competitors: {competitors}")
if 'research_preferences.competitor_analysis' in source_data:
analysis = source_data['research_preferences.competitor_analysis']
if analysis:
position_indicators.append(f"Analysis: {analysis}")
return '; '.join(position_indicators) if position_indicators else None
except Exception as e:
logger.error(f"Error extracting competitive position: {str(e)}")
return None
def extract_performance_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract performance metrics."""
try:
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict):
return ', '.join([f"{k}: {v}" for k, v in metrics.items()])
elif isinstance(metrics, str):
return metrics
return None
except Exception as e:
logger.error(f"Error extracting performance metrics: {str(e)}")
return None
def extract_content_preferences(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract content preferences from research preferences."""
try:
if 'research_preferences.content_types' in source_data:
content_types = source_data['research_preferences.content_types']
if isinstance(content_types, list):
return ', '.join(content_types)
elif isinstance(content_types, str):
return content_types
return None
except Exception as e:
logger.error(f"Error extracting content preferences: {str(e)}")
return None
def extract_consumption_patterns(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract consumption patterns from audience data."""
try:
patterns = []
if 'website_analysis.target_audience' in source_data:
audience = source_data['website_analysis.target_audience']
if audience:
patterns.append(f"Website Audience: {audience}")
if 'research_preferences.target_audience' in source_data:
research_audience = source_data['research_preferences.target_audience']
if research_audience:
patterns.append(f"Research Audience: {research_audience}")
return '; '.join(patterns) if patterns else None
except Exception as e:
logger.error(f"Error extracting consumption patterns: {str(e)}")
return None
def extract_pain_points(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract audience pain points from content gaps and research topics."""
try:
pain_points = []
if 'website_analysis.content_gaps' in source_data:
gaps = source_data['website_analysis.content_gaps']
if isinstance(gaps, list):
pain_points.extend(gaps)
elif isinstance(gaps, str):
pain_points.append(gaps)
if 'research_preferences.research_topics' in source_data:
topics = source_data['research_preferences.research_topics']
if isinstance(topics, list):
pain_points.extend(topics)
elif isinstance(topics, str):
pain_points.append(topics)
return ', '.join(pain_points) if pain_points else None
except Exception as e:
logger.error(f"Error extracting pain points: {str(e)}")
return None
def extract_buying_journey(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract buying journey from audience data."""
try:
if 'website_analysis.target_audience' in source_data:
audience = source_data['website_analysis.target_audience']
if audience:
return f"Journey based on: {audience}"
return None
except Exception as e:
logger.error(f"Error extracting buying journey: {str(e)}")
return None
def extract_seasonal_trends(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract seasonal trends from trend analysis."""
try:
if 'research_preferences.trend_analysis' in source_data:
trends = source_data['research_preferences.trend_analysis']
if isinstance(trends, list):
return ', '.join(trends)
elif isinstance(trends, str):
return trends
return None
except Exception as e:
logger.error(f"Error extracting seasonal trends: {str(e)}")
return None
def extract_engagement_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract engagement metrics from performance data."""
try:
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict):
engagement_metrics = {k: v for k, v in metrics.items() if 'engagement' in k.lower()}
if engagement_metrics:
return ', '.join([f"{k}: {v}" for k, v in engagement_metrics.items()])
return None
except Exception as e:
logger.error(f"Error extracting engagement metrics: {str(e)}")
return None
def extract_competitors(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract top competitors from competitor data."""
try:
if 'website_analysis.competitors' in source_data:
competitors = source_data['website_analysis.competitors']
if isinstance(competitors, list):
return ', '.join(competitors)
elif isinstance(competitors, str):
return competitors
return None
except Exception as e:
logger.error(f"Error extracting competitors: {str(e)}")
return None
def extract_competitor_strategies(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract competitor content strategies."""
try:
strategies = []
if 'website_analysis.competitors' in source_data:
competitors = source_data['website_analysis.competitors']
if competitors:
strategies.append(f"Competitors: {competitors}")
if 'research_preferences.competitor_analysis' in source_data:
analysis = source_data['research_preferences.competitor_analysis']
if analysis:
strategies.append(f"Analysis: {analysis}")
return '; '.join(strategies) if strategies else None
except Exception as e:
logger.error(f"Error extracting competitor strategies: {str(e)}")
return None
def extract_market_gaps(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract market gaps from content gaps and research topics."""
try:
gaps = []
if 'website_analysis.content_gaps' in source_data:
content_gaps = source_data['website_analysis.content_gaps']
if isinstance(content_gaps, list):
gaps.extend(content_gaps)
elif isinstance(content_gaps, str):
gaps.append(content_gaps)
if 'research_preferences.research_topics' in source_data:
topics = source_data['research_preferences.research_topics']
if isinstance(topics, list):
gaps.extend(topics)
elif isinstance(topics, str):
gaps.append(topics)
return ', '.join(gaps) if gaps else None
except Exception as e:
logger.error(f"Error extracting market gaps: {str(e)}")
return None
def extract_industry_trends(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract industry trends from industry data."""
try:
trends = []
if 'website_analysis.industry' in source_data:
industry = source_data['website_analysis.industry']
if industry:
trends.append(f"Industry: {industry}")
if 'research_preferences.industry_focus' in source_data:
focus = source_data['research_preferences.industry_focus']
if focus:
trends.append(f"Focus: {focus}")
return '; '.join(trends) if trends else None
except Exception as e:
logger.error(f"Error extracting industry trends: {str(e)}")
return None
def extract_emerging_trends(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract emerging trends from trend analysis."""
try:
if 'research_preferences.trend_analysis' in source_data:
trends = source_data['research_preferences.trend_analysis']
if isinstance(trends, list):
return ', '.join(trends)
elif isinstance(trends, str):
return trends
return None
except Exception as e:
logger.error(f"Error extracting emerging trends: {str(e)}")
return None
def extract_preferred_formats(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract preferred content formats."""
try:
if 'research_preferences.content_types' in source_data:
content_types = source_data['research_preferences.content_types']
if isinstance(content_types, list):
return ', '.join(content_types)
elif isinstance(content_types, str):
return content_types
return None
except Exception as e:
logger.error(f"Error extracting preferred formats: {str(e)}")
return None
def extract_content_mix(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract content mix from content types and goals."""
try:
mix_components = []
if 'research_preferences.content_types' in source_data:
content_types = source_data['research_preferences.content_types']
if content_types:
mix_components.append(f"Types: {content_types}")
if 'website_analysis.content_goals' in source_data:
goals = source_data['website_analysis.content_goals']
if goals:
mix_components.append(f"Goals: {goals}")
return '; '.join(mix_components) if mix_components else None
except Exception as e:
logger.error(f"Error extracting content mix: {str(e)}")
return None
def extract_content_frequency(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract content frequency from calendar data."""
try:
if 'research_preferences.content_calendar' in source_data:
calendar = source_data['research_preferences.content_calendar']
if calendar:
return str(calendar)
return None
except Exception as e:
logger.error(f"Error extracting content frequency: {str(e)}")
return None
def extract_optimal_timing(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract optimal timing from calendar data."""
try:
if 'research_preferences.content_calendar' in source_data:
calendar = source_data['research_preferences.content_calendar']
if calendar:
return str(calendar)
return None
except Exception as e:
logger.error(f"Error extracting optimal timing: {str(e)}")
return None
def extract_quality_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract quality metrics from performance data."""
try:
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict):
quality_metrics = {k: v for k, v in metrics.items() if 'quality' in k.lower()}
if quality_metrics:
return ', '.join([f"{k}: {v}" for k, v in quality_metrics.items()])
return None
except Exception as e:
logger.error(f"Error extracting quality metrics: {str(e)}")
return None
def extract_editorial_guidelines(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract editorial guidelines from business type and content types."""
try:
guidelines = []
if 'website_analysis.business_type' in source_data:
business_type = source_data['website_analysis.business_type']
if business_type:
guidelines.append(f"Business Type: {business_type}")
if 'research_preferences.content_types' in source_data:
content_types = source_data['research_preferences.content_types']
if content_types:
guidelines.append(f"Content Types: {content_types}")
return '; '.join(guidelines) if guidelines else None
except Exception as e:
logger.error(f"Error extracting editorial guidelines: {str(e)}")
return None
def extract_brand_voice(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract brand voice from business type and session data."""
try:
voice_indicators = []
if 'website_analysis.business_type' in source_data:
business_type = source_data['website_analysis.business_type']
if business_type:
voice_indicators.append(f"Business Type: {business_type}")
if 'onboarding_session.session_data.brand_voice' in source_data:
brand_voice = source_data['onboarding_session.session_data.brand_voice']
if brand_voice:
voice_indicators.append(f"Brand Voice: {brand_voice}")
return '; '.join(voice_indicators) if voice_indicators else None
except Exception as e:
logger.error(f"Error extracting brand voice: {str(e)}")
return None
def extract_traffic_sources(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract traffic sources from performance metrics."""
try:
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict):
traffic_metrics = {k: v for k, v in metrics.items() if 'traffic' in k.lower()}
if traffic_metrics:
return ', '.join([f"{k}: {v}" for k, v in traffic_metrics.items()])
return None
except Exception as e:
logger.error(f"Error extracting traffic sources: {str(e)}")
return None
def extract_conversion_rates(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract conversion rates from performance metrics."""
try:
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict):
conversion_metrics = {k: v for k, v in metrics.items() if 'conversion' in k.lower()}
if conversion_metrics:
return ', '.join([f"{k}: {v}" for k, v in conversion_metrics.items()])
return None
except Exception as e:
logger.error(f"Error extracting conversion rates: {str(e)}")
return None
def extract_roi_targets(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract ROI targets from budget and performance data."""
try:
targets = []
if 'onboarding_session.session_data.budget' in source_data:
budget = source_data['onboarding_session.session_data.budget']
if budget:
targets.append(f"Budget: {budget}")
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict):
roi_metrics = {k: v for k, v in metrics.items() if 'roi' in k.lower()}
if roi_metrics:
targets.append(f"ROI Metrics: {roi_metrics}")
return '; '.join(targets) if targets else None
except Exception as e:
logger.error(f"Error extracting ROI targets: {str(e)}")
return None
def extract_ab_testing_capabilities(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract A/B testing capabilities from team size."""
try:
if 'onboarding_session.session_data.team_size' in source_data:
team_size = source_data['onboarding_session.session_data.team_size']
if team_size:
# Simple logic based on team size
if int(team_size) > 5:
return "Advanced A/B testing capabilities"
elif int(team_size) > 2:
return "Basic A/B testing capabilities"
else:
return "Limited A/B testing capabilities"
return None
except Exception as e:
logger.error(f"Error extracting A/B testing capabilities: {str(e)}")
return None

View File

@@ -0,0 +1,10 @@
"""
Performance Module
Caching, optimization, and health monitoring services.
"""
from .caching import CachingService
from .optimization import PerformanceOptimizationService
from .health_monitoring import HealthMonitoringService
__all__ = ['CachingService', 'PerformanceOptimizationService', 'HealthMonitoringService']

View File

@@ -0,0 +1,469 @@
"""
Caching Service
Cache management and optimization.
"""
import logging
import json
import hashlib
from typing import Dict, Any, Optional, List
from datetime import datetime, timedelta
logger = logging.getLogger(__name__)
# Try to import Redis, fallback to in-memory if not available
try:
import redis
REDIS_AVAILABLE = True
except ImportError:
REDIS_AVAILABLE = False
logger.warning("Redis not available, using in-memory caching")
class CachingService:
"""Service for intelligent caching of content strategy data."""
def __init__(self):
# Cache configuration
self.cache_config = {
'ai_analysis': {
'ttl': 3600, # 1 hour
'max_size': 1000,
'priority': 'high'
},
'onboarding_data': {
'ttl': 1800, # 30 minutes
'max_size': 500,
'priority': 'medium'
},
'strategy_cache': {
'ttl': 7200, # 2 hours
'max_size': 200,
'priority': 'high'
},
'field_transformations': {
'ttl': 900, # 15 minutes
'max_size': 1000,
'priority': 'low'
}
}
# Initialize Redis connection if available
self.redis_available = False
if REDIS_AVAILABLE:
try:
self.redis_client = redis.Redis(
host='localhost',
port=6379,
db=0,
decode_responses=True,
socket_connect_timeout=5,
socket_timeout=5
)
# Test connection
self.redis_client.ping()
self.redis_available = True
logger.info("Redis connection established successfully")
except Exception as e:
logger.warning(f"Redis connection failed: {str(e)}. Using in-memory cache.")
self.redis_available = False
self.memory_cache = {}
else:
logger.info("Using in-memory cache (Redis not available)")
self.memory_cache = {}
def get_cache_key(self, cache_type: str, identifier: str, **kwargs) -> str:
"""Generate a unique cache key."""
try:
# Create a hash of the identifier and additional parameters
key_data = f"{cache_type}:{identifier}"
if kwargs:
key_data += ":" + json.dumps(kwargs, sort_keys=True)
# Create hash for consistent key length
key_hash = hashlib.md5(key_data.encode()).hexdigest()
return f"content_strategy:{cache_type}:{key_hash}"
except Exception as e:
logger.error(f"Error generating cache key: {str(e)}")
return f"content_strategy:{cache_type}:{identifier}"
async def get_cached_data(self, cache_type: str, identifier: str, **kwargs) -> Optional[Dict[str, Any]]:
"""Retrieve cached data."""
try:
if not self.redis_available:
return self._get_from_memory_cache(cache_type, identifier, **kwargs)
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
cached_data = self.redis_client.get(cache_key)
if cached_data:
data = json.loads(cached_data)
logger.info(f"Cache hit for {cache_type}:{identifier}")
return data
else:
logger.info(f"Cache miss for {cache_type}:{identifier}")
return None
except Exception as e:
logger.error(f"Error retrieving cached data: {str(e)}")
return None
async def set_cached_data(self, cache_type: str, identifier: str, data: Dict[str, Any], **kwargs) -> bool:
"""Store data in cache."""
try:
if not self.redis_available:
return self._set_in_memory_cache(cache_type, identifier, data, **kwargs)
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
ttl = self.cache_config.get(cache_type, {}).get('ttl', 3600)
# Add metadata to cached data
cached_data = {
'data': data,
'metadata': {
'cached_at': datetime.utcnow().isoformat(),
'cache_type': cache_type,
'identifier': identifier,
'ttl': ttl
}
}
# Store in Redis with TTL
result = self.redis_client.setex(
cache_key,
ttl,
json.dumps(cached_data, default=str)
)
if result:
logger.info(f"Data cached successfully for {cache_type}:{identifier}")
await self._update_cache_stats(cache_type, 'set')
return True
else:
logger.warning(f"Failed to cache data for {cache_type}:{identifier}")
return False
except Exception as e:
logger.error(f"Error setting cached data: {str(e)}")
return False
async def invalidate_cache(self, cache_type: str, identifier: str, **kwargs) -> bool:
"""Invalidate specific cached data."""
try:
if not self.redis_available:
return self._invalidate_memory_cache(cache_type, identifier, **kwargs)
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
result = self.redis_client.delete(cache_key)
if result:
logger.info(f"Cache invalidated for {cache_type}:{identifier}")
await self._update_cache_stats(cache_type, 'invalidate')
return True
else:
logger.warning(f"No cache entry found to invalidate for {cache_type}:{identifier}")
return False
except Exception as e:
logger.error(f"Error invalidating cache: {str(e)}")
return False
async def clear_cache_type(self, cache_type: str) -> bool:
"""Clear all cached data of a specific type."""
try:
if not self.redis_available:
return self._clear_memory_cache_type(cache_type)
pattern = f"content_strategy:{cache_type}:*"
keys = self.redis_client.keys(pattern)
if keys:
result = self.redis_client.delete(*keys)
logger.info(f"Cleared {result} cache entries for {cache_type}")
await self._update_cache_stats(cache_type, 'clear')
return True
else:
logger.info(f"No cache entries found for {cache_type}")
return True
except Exception as e:
logger.error(f"Error clearing cache type {cache_type}: {str(e)}")
return False
async def get_cache_stats(self, cache_type: Optional[str] = None) -> Dict[str, Any]:
"""Get cache statistics."""
try:
if not self.redis_available:
return self._get_memory_cache_stats(cache_type)
stats = {}
if cache_type:
pattern = f"content_strategy:{cache_type}:*"
keys = self.redis_client.keys(pattern)
stats[cache_type] = {
'entries': len(keys),
'size_bytes': sum(len(self.redis_client.get(key) or '') for key in keys),
'config': self.cache_config.get(cache_type, {})
}
else:
for cache_type_name in self.cache_config.keys():
pattern = f"content_strategy:{cache_type_name}:*"
keys = self.redis_client.keys(pattern)
stats[cache_type_name] = {
'entries': len(keys),
'size_bytes': sum(len(self.redis_client.get(key) or '') for key in keys),
'config': self.cache_config.get(cache_type_name, {})
}
return stats
except Exception as e:
logger.error(f"Error getting cache stats: {str(e)}")
return {}
async def optimize_cache(self) -> Dict[str, Any]:
"""Optimize cache by removing expired entries and managing memory."""
try:
if not self.redis_available:
return self._optimize_memory_cache()
optimization_results = {}
for cache_type, config in self.cache_config.items():
pattern = f"content_strategy:{cache_type}:*"
keys = self.redis_client.keys(pattern)
if len(keys) > config.get('max_size', 1000):
# Remove oldest entries to maintain max size
keys_with_times = []
for key in keys:
ttl = self.redis_client.ttl(key)
if ttl > 0: # Key still has TTL
keys_with_times.append((key, ttl))
# Sort by TTL (oldest first)
keys_with_times.sort(key=lambda x: x[1])
# Remove excess entries
excess_count = len(keys) - config.get('max_size', 1000)
keys_to_remove = [key for key, _ in keys_with_times[:excess_count]]
if keys_to_remove:
removed_count = self.redis_client.delete(*keys_to_remove)
optimization_results[cache_type] = {
'entries_removed': removed_count,
'reason': 'max_size_exceeded'
}
logger.info(f"Optimized {cache_type} cache: removed {removed_count} entries")
return optimization_results
except Exception as e:
logger.error(f"Error optimizing cache: {str(e)}")
return {}
async def _update_cache_stats(self, cache_type: str, operation: str) -> None:
"""Update cache statistics."""
try:
if not self.redis_available:
return
stats_key = f"cache_stats:{cache_type}"
current_stats = self.redis_client.hgetall(stats_key)
# Update operation counts
current_stats[f"{operation}_count"] = str(int(current_stats.get(f"{operation}_count", 0)) + 1)
current_stats['last_updated'] = datetime.utcnow().isoformat()
# Store updated stats
self.redis_client.hset(stats_key, mapping=current_stats)
except Exception as e:
logger.error(f"Error updating cache stats: {str(e)}")
# Memory cache fallback methods
def _get_from_memory_cache(self, cache_type: str, identifier: str, **kwargs) -> Optional[Dict[str, Any]]:
"""Get data from memory cache."""
try:
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
cached_data = self.memory_cache.get(cache_key)
if cached_data:
# Check if data is still valid
cached_at = datetime.fromisoformat(cached_data['metadata']['cached_at'])
ttl = cached_data['metadata']['ttl']
if datetime.utcnow() - cached_at < timedelta(seconds=ttl):
logger.info(f"Memory cache hit for {cache_type}:{identifier}")
return cached_data['data']
else:
# Remove expired entry
del self.memory_cache[cache_key]
return None
except Exception as e:
logger.error(f"Error getting from memory cache: {str(e)}")
return None
def _set_in_memory_cache(self, cache_type: str, identifier: str, data: Dict[str, Any], **kwargs) -> bool:
"""Set data in memory cache."""
try:
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
ttl = self.cache_config.get(cache_type, {}).get('ttl', 3600)
cached_data = {
'data': data,
'metadata': {
'cached_at': datetime.utcnow().isoformat(),
'cache_type': cache_type,
'identifier': identifier,
'ttl': ttl
}
}
# Check max size and remove oldest if needed
max_size = self.cache_config.get(cache_type, {}).get('max_size', 1000)
if len(self.memory_cache) >= max_size:
# Remove oldest entry
oldest_key = min(self.memory_cache.keys(),
key=lambda k: self.memory_cache[k]['metadata']['cached_at'])
del self.memory_cache[oldest_key]
self.memory_cache[cache_key] = cached_data
logger.info(f"Data cached in memory for {cache_type}:{identifier}")
return True
except Exception as e:
logger.error(f"Error setting in memory cache: {str(e)}")
return False
def _invalidate_memory_cache(self, cache_type: str, identifier: str, **kwargs) -> bool:
"""Invalidate memory cache entry."""
try:
cache_key = self.get_cache_key(cache_type, identifier, **kwargs)
if cache_key in self.memory_cache:
del self.memory_cache[cache_key]
logger.info(f"Memory cache invalidated for {cache_type}:{identifier}")
return True
return False
except Exception as e:
logger.error(f"Error invalidating memory cache: {str(e)}")
return False
def _clear_memory_cache_type(self, cache_type: str) -> bool:
"""Clear memory cache by type."""
try:
keys_to_remove = [key for key in self.memory_cache.keys()
if key.startswith(f"content_strategy:{cache_type}:")]
for key in keys_to_remove:
del self.memory_cache[key]
logger.info(f"Cleared {len(keys_to_remove)} memory cache entries for {cache_type}")
return True
except Exception as e:
logger.error(f"Error clearing memory cache type: {str(e)}")
return False
def _get_memory_cache_stats(self, cache_type: Optional[str] = None) -> Dict[str, Any]:
"""Get memory cache statistics."""
try:
stats = {}
if cache_type:
keys = [key for key in self.memory_cache.keys()
if key.startswith(f"content_strategy:{cache_type}:")]
stats[cache_type] = {
'entries': len(keys),
'size_bytes': sum(len(str(value)) for value in [self.memory_cache[key] for key in keys]),
'config': self.cache_config.get(cache_type, {})
}
else:
for cache_type_name in self.cache_config.keys():
keys = [key for key in self.memory_cache.keys()
if key.startswith(f"content_strategy:{cache_type_name}:")]
stats[cache_type_name] = {
'entries': len(keys),
'size_bytes': sum(len(str(value)) for value in [self.memory_cache[key] for key in keys]),
'config': self.cache_config.get(cache_type_name, {})
}
return stats
except Exception as e:
logger.error(f"Error getting memory cache stats: {str(e)}")
return {}
def _optimize_memory_cache(self) -> Dict[str, Any]:
"""Optimize memory cache."""
try:
optimization_results = {}
for cache_type, config in self.cache_config.items():
keys = [key for key in self.memory_cache.keys()
if key.startswith(f"content_strategy:{cache_type}:")]
if len(keys) > config.get('max_size', 1000):
# Remove oldest entries
keys_with_times = []
for key in keys:
cached_at = datetime.fromisoformat(self.memory_cache[key]['metadata']['cached_at'])
keys_with_times.append((key, cached_at))
# Sort by cached time (oldest first)
keys_with_times.sort(key=lambda x: x[1])
# Remove excess entries
excess_count = len(keys) - config.get('max_size', 1000)
keys_to_remove = [key for key, _ in keys_with_times[:excess_count]]
for key in keys_to_remove:
del self.memory_cache[key]
optimization_results[cache_type] = {
'entries_removed': len(keys_to_remove),
'reason': 'max_size_exceeded'
}
return optimization_results
except Exception as e:
logger.error(f"Error optimizing memory cache: {str(e)}")
return {}
# Cache-specific methods for different data types
async def cache_ai_analysis(self, user_id: int, analysis_type: str, analysis_data: Dict[str, Any]) -> bool:
"""Cache AI analysis results."""
return await self.set_cached_data('ai_analysis', f"{user_id}:{analysis_type}", analysis_data)
async def get_cached_ai_analysis(self, user_id: int, analysis_type: str) -> Optional[Dict[str, Any]]:
"""Get cached AI analysis results."""
return await self.get_cached_data('ai_analysis', f"{user_id}:{analysis_type}")
async def cache_onboarding_data(self, user_id: int, onboarding_data: Dict[str, Any]) -> bool:
"""Cache onboarding data."""
return await self.set_cached_data('onboarding_data', str(user_id), onboarding_data)
async def get_cached_onboarding_data(self, user_id: int) -> Optional[Dict[str, Any]]:
"""Get cached onboarding data."""
return await self.get_cached_data('onboarding_data', str(user_id))
async def cache_strategy(self, strategy_id: int, strategy_data: Dict[str, Any]) -> bool:
"""Cache strategy data."""
return await self.set_cached_data('strategy_cache', str(strategy_id), strategy_data)
async def get_cached_strategy(self, strategy_id: int) -> Optional[Dict[str, Any]]:
"""Get cached strategy data."""
return await self.get_cached_data('strategy_cache', str(strategy_id))
async def cache_field_transformations(self, user_id: int, transformations: Dict[str, Any]) -> bool:
"""Cache field transformations."""
return await self.set_cached_data('field_transformations', str(user_id), transformations)
async def get_cached_field_transformations(self, user_id: int) -> Optional[Dict[str, Any]]:
"""Get cached field transformations."""
return await self.get_cached_data('field_transformations', str(user_id))

View File

@@ -0,0 +1,503 @@
"""
Health Monitoring Service
System health monitoring and performance tracking.
"""
import logging
import time
import asyncio
from typing import Dict, Any, List, Optional
from datetime import datetime, timedelta
from sqlalchemy.orm import Session
from sqlalchemy import text
logger = logging.getLogger(__name__)
class HealthMonitoringService:
"""Service for system health monitoring and assessment."""
def __init__(self):
self.health_thresholds = {
'database_response_time': 1.0, # seconds
'cache_response_time': 0.1, # seconds
'ai_service_response_time': 5.0, # seconds
'memory_usage_threshold': 80, # percentage
'cpu_usage_threshold': 80, # percentage
'disk_usage_threshold': 90, # percentage
'error_rate_threshold': 0.05 # 5%
}
self.health_status = {
'timestamp': None,
'overall_status': 'healthy',
'components': {},
'alerts': [],
'recommendations': []
}
async def check_system_health(self, db: Session, cache_service=None, ai_service=None) -> Dict[str, Any]:
"""Perform comprehensive system health check."""
try:
logger.info("Starting comprehensive system health check")
health_report = {
'timestamp': datetime.utcnow().isoformat(),
'overall_status': 'healthy',
'components': {},
'alerts': [],
'recommendations': []
}
# Check database health
db_health = await self._check_database_health(db)
health_report['components']['database'] = db_health
# Check cache health
if cache_service:
cache_health = await self._check_cache_health(cache_service)
health_report['components']['cache'] = cache_health
else:
health_report['components']['cache'] = {'status': 'not_available', 'message': 'Cache service not provided'}
# Check AI service health
if ai_service:
ai_health = await self._check_ai_service_health(ai_service)
health_report['components']['ai_service'] = ai_health
else:
health_report['components']['ai_service'] = {'status': 'not_available', 'message': 'AI service not provided'}
# Check system resources
system_health = await self._check_system_resources()
health_report['components']['system'] = system_health
# Determine overall status
health_report['overall_status'] = self._determine_overall_health(health_report['components'])
# Generate alerts and recommendations
health_report['alerts'] = self._generate_health_alerts(health_report['components'])
health_report['recommendations'] = await self._generate_health_recommendations(health_report['components'])
# Update health status
self.health_status = health_report
logger.info(f"System health check completed. Overall status: {health_report['overall_status']}")
return health_report
except Exception as e:
logger.error(f"Error during system health check: {str(e)}")
return {
'timestamp': datetime.utcnow().isoformat(),
'overall_status': 'error',
'components': {},
'alerts': [f'Health check failed: {str(e)}'],
'recommendations': ['Investigate health check system']
}
async def _check_database_health(self, db: Session) -> Dict[str, Any]:
"""Check database health and performance."""
try:
start_time = time.time()
# Test database connection
try:
result = db.execute(text("SELECT 1"))
result.fetchone()
connection_status = 'healthy'
except Exception as e:
connection_status = 'unhealthy'
logger.error(f"Database connection test failed: {str(e)}")
# Test query performance
try:
query_start = time.time()
result = db.execute(text("SELECT COUNT(*) FROM information_schema.tables"))
result.fetchone()
query_time = time.time() - query_start
query_status = 'healthy' if query_time <= self.health_thresholds['database_response_time'] else 'degraded'
except Exception as e:
query_time = 0
query_status = 'unhealthy'
logger.error(f"Database query test failed: {str(e)}")
# Check database size and performance
try:
# Get database statistics
db_stats = await self._get_database_statistics(db)
except Exception as e:
db_stats = {'error': str(e)}
total_time = time.time() - start_time
return {
'status': 'healthy' if connection_status == 'healthy' and query_status == 'healthy' else 'degraded',
'connection_status': connection_status,
'query_status': query_status,
'response_time': query_time,
'total_check_time': total_time,
'statistics': db_stats,
'last_checked': datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Error checking database health: {str(e)}")
return {
'status': 'unhealthy',
'error': str(e),
'last_checked': datetime.utcnow().isoformat()
}
async def _check_cache_health(self, cache_service) -> Dict[str, Any]:
"""Check cache health and performance."""
try:
start_time = time.time()
# Test cache connectivity
try:
cache_stats = await cache_service.get_cache_stats()
connectivity_status = 'healthy'
except Exception as e:
cache_stats = {}
connectivity_status = 'unhealthy'
logger.error(f"Cache connectivity test failed: {str(e)}")
# Test cache performance
try:
test_key = f"health_check_{int(time.time())}"
test_data = {'test': 'data', 'timestamp': datetime.utcnow().isoformat()}
# Test write
write_start = time.time()
write_success = await cache_service.set_cached_data('health_check', test_key, test_data)
write_time = time.time() - write_start
# Test read
read_start = time.time()
read_data = await cache_service.get_cached_data('health_check', test_key)
read_time = time.time() - read_start
# Clean up
await cache_service.invalidate_cache('health_check', test_key)
performance_status = 'healthy' if write_success and read_data and (write_time + read_time) <= self.health_thresholds['cache_response_time'] else 'degraded'
except Exception as e:
write_time = 0
read_time = 0
performance_status = 'unhealthy'
logger.error(f"Cache performance test failed: {str(e)}")
total_time = time.time() - start_time
return {
'status': 'healthy' if connectivity_status == 'healthy' and performance_status == 'healthy' else 'degraded',
'connectivity_status': connectivity_status,
'performance_status': performance_status,
'write_time': write_time,
'read_time': read_time,
'total_check_time': total_time,
'statistics': cache_stats,
'last_checked': datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Error checking cache health: {str(e)}")
return {
'status': 'unhealthy',
'error': str(e),
'last_checked': datetime.utcnow().isoformat()
}
async def _check_ai_service_health(self, ai_service) -> Dict[str, Any]:
"""Check AI service health and performance."""
try:
start_time = time.time()
# Test AI service connectivity
try:
# Simple test call to AI service
test_prompt = "Test health check"
ai_start = time.time()
ai_response = await ai_service._call_ai_service(test_prompt, 'health_check')
ai_time = time.time() - ai_start
connectivity_status = 'healthy' if ai_response else 'unhealthy'
performance_status = 'healthy' if ai_time <= self.health_thresholds['ai_service_response_time'] else 'degraded'
except Exception as e:
ai_time = 0
connectivity_status = 'unhealthy'
performance_status = 'unhealthy'
logger.error(f"AI service health check failed: {str(e)}")
total_time = time.time() - start_time
return {
'status': 'healthy' if connectivity_status == 'healthy' and performance_status == 'healthy' else 'degraded',
'connectivity_status': connectivity_status,
'performance_status': performance_status,
'response_time': ai_time,
'total_check_time': total_time,
'last_checked': datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Error checking AI service health: {str(e)}")
return {
'status': 'unhealthy',
'error': str(e),
'last_checked': datetime.utcnow().isoformat()
}
async def _check_system_resources(self) -> Dict[str, Any]:
"""Check system resource usage."""
try:
import psutil
# CPU usage
cpu_percent = psutil.cpu_percent(interval=1)
cpu_status = 'healthy' if cpu_percent <= self.health_thresholds['cpu_usage_threshold'] else 'degraded'
# Memory usage
memory = psutil.virtual_memory()
memory_percent = memory.percent
memory_status = 'healthy' if memory_percent <= self.health_thresholds['memory_usage_threshold'] else 'degraded'
# Disk usage
disk = psutil.disk_usage('/')
disk_percent = disk.percent
disk_status = 'healthy' if disk_percent <= self.health_thresholds['disk_usage_threshold'] else 'degraded'
# Network status
try:
network = psutil.net_io_counters()
network_status = 'healthy'
except Exception:
network_status = 'degraded'
return {
'status': 'healthy' if all(s == 'healthy' for s in [cpu_status, memory_status, disk_status, network_status]) else 'degraded',
'cpu': {
'usage_percent': cpu_percent,
'status': cpu_status
},
'memory': {
'usage_percent': memory_percent,
'available_gb': memory.available / (1024**3),
'total_gb': memory.total / (1024**3),
'status': memory_status
},
'disk': {
'usage_percent': disk_percent,
'free_gb': disk.free / (1024**3),
'total_gb': disk.total / (1024**3),
'status': disk_status
},
'network': {
'status': network_status
},
'last_checked': datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Error checking system resources: {str(e)}")
return {
'status': 'unhealthy',
'error': str(e),
'last_checked': datetime.utcnow().isoformat()
}
async def _get_database_statistics(self, db: Session) -> Dict[str, Any]:
"""Get database statistics."""
try:
stats = {}
# Get table counts (simplified)
try:
result = db.execute(text("SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public'"))
stats['table_count'] = result.fetchone()[0]
except Exception:
stats['table_count'] = 'unknown'
# Get database size (simplified)
try:
result = db.execute(text("SELECT pg_size_pretty(pg_database_size(current_database()))"))
stats['database_size'] = result.fetchone()[0]
except Exception:
stats['database_size'] = 'unknown'
return stats
except Exception as e:
logger.error(f"Error getting database statistics: {str(e)}")
return {'error': str(e)}
def _determine_overall_health(self, components: Dict[str, Any]) -> str:
"""Determine overall system health based on component status."""
try:
statuses = []
for component_name, component_data in components.items():
if isinstance(component_data, dict) and 'status' in component_data:
statuses.append(component_data['status'])
if not statuses:
return 'unknown'
if 'unhealthy' in statuses:
return 'unhealthy'
elif 'degraded' in statuses:
return 'degraded'
elif all(status == 'healthy' for status in statuses):
return 'healthy'
else:
return 'unknown'
except Exception as e:
logger.error(f"Error determining overall health: {str(e)}")
return 'unknown'
def _generate_health_alerts(self, components: Dict[str, Any]) -> List[str]:
"""Generate health alerts based on component status."""
try:
alerts = []
for component_name, component_data in components.items():
if isinstance(component_data, dict) and 'status' in component_data:
status = component_data['status']
if status == 'unhealthy':
alerts.append(f"CRITICAL: {component_name} is unhealthy")
elif status == 'degraded':
alerts.append(f"WARNING: {component_name} performance is degraded")
# Component-specific alerts
if component_name == 'database' and component_data.get('response_time', 0) > self.health_thresholds['database_response_time']:
alerts.append(f"WARNING: Database response time is slow: {component_data['response_time']:.2f}s")
elif component_name == 'cache' and component_data.get('write_time', 0) + component_data.get('read_time', 0) > self.health_thresholds['cache_response_time']:
alerts.append(f"WARNING: Cache response time is slow: {component_data.get('write_time', 0) + component_data.get('read_time', 0):.2f}s")
elif component_name == 'ai_service' and component_data.get('response_time', 0) > self.health_thresholds['ai_service_response_time']:
alerts.append(f"WARNING: AI service response time is slow: {component_data['response_time']:.2f}s")
elif component_name == 'system':
cpu_data = component_data.get('cpu', {})
memory_data = component_data.get('memory', {})
disk_data = component_data.get('disk', {})
if cpu_data.get('usage_percent', 0) > self.health_thresholds['cpu_usage_threshold']:
alerts.append(f"WARNING: High CPU usage: {cpu_data['usage_percent']:.1f}%")
if memory_data.get('usage_percent', 0) > self.health_thresholds['memory_usage_threshold']:
alerts.append(f"WARNING: High memory usage: {memory_data['usage_percent']:.1f}%")
if disk_data.get('usage_percent', 0) > self.health_thresholds['disk_usage_threshold']:
alerts.append(f"WARNING: High disk usage: {disk_data['usage_percent']:.1f}%")
return alerts
except Exception as e:
logger.error(f"Error generating health alerts: {str(e)}")
return ['Error generating health alerts']
async def _generate_health_recommendations(self, components: Dict[str, Any]) -> List[str]:
"""Generate health recommendations based on component status."""
try:
recommendations = []
for component_name, component_data in components.items():
if isinstance(component_data, dict) and 'status' in component_data:
status = component_data['status']
if status == 'unhealthy':
if component_name == 'database':
recommendations.append("Investigate database connectivity and configuration")
elif component_name == 'cache':
recommendations.append("Check cache service configuration and connectivity")
elif component_name == 'ai_service':
recommendations.append("Verify AI service configuration and API keys")
elif component_name == 'system':
recommendations.append("Check system resources and restart if necessary")
elif status == 'degraded':
if component_name == 'database':
recommendations.append("Optimize database queries and add indexes")
elif component_name == 'cache':
recommendations.append("Consider cache optimization and memory allocation")
elif component_name == 'ai_service':
recommendations.append("Review AI service performance and rate limits")
elif component_name == 'system':
recommendations.append("Monitor system resources and consider scaling")
# Specific recommendations based on metrics
if component_name == 'database' and component_data.get('response_time', 0) > self.health_thresholds['database_response_time']:
recommendations.append("Add database indexes for frequently queried columns")
recommendations.append("Consider database connection pooling")
elif component_name == 'system':
cpu_data = component_data.get('cpu', {})
memory_data = component_data.get('memory', {})
disk_data = component_data.get('disk', {})
if cpu_data.get('usage_percent', 0) > self.health_thresholds['cpu_usage_threshold']:
recommendations.append("Consider scaling CPU resources or optimizing CPU-intensive operations")
if memory_data.get('usage_percent', 0) > self.health_thresholds['memory_usage_threshold']:
recommendations.append("Increase memory allocation or optimize memory usage")
if disk_data.get('usage_percent', 0) > self.health_thresholds['disk_usage_threshold']:
recommendations.append("Clean up disk space or increase storage capacity")
return recommendations
except Exception as e:
logger.error(f"Error generating health recommendations: {str(e)}")
return ['Unable to generate health recommendations']
async def get_health_history(self, hours: int = 24) -> List[Dict[str, Any]]:
"""Get health check history."""
try:
# This would typically query a database for historical health data
# For now, return the current health status
return [self.health_status] if self.health_status.get('timestamp') else []
except Exception as e:
logger.error(f"Error getting health history: {str(e)}")
return []
async def set_health_thresholds(self, thresholds: Dict[str, float]) -> bool:
"""Update health monitoring thresholds."""
try:
for key, value in thresholds.items():
if key in self.health_thresholds:
self.health_thresholds[key] = value
logger.info(f"Updated health threshold {key}: {value}")
return True
except Exception as e:
logger.error(f"Error setting health thresholds: {str(e)}")
return False
async def get_health_thresholds(self) -> Dict[str, float]:
"""Get current health monitoring thresholds."""
return self.health_thresholds.copy()
async def start_continuous_monitoring(self, interval_seconds: int = 300) -> None:
"""Start continuous health monitoring."""
try:
logger.info(f"Starting continuous health monitoring with {interval_seconds}s interval")
while True:
try:
# This would typically use the database session and services
# For now, just log that monitoring is active
logger.info("Continuous health monitoring check")
await asyncio.sleep(interval_seconds)
except Exception as e:
logger.error(f"Error in continuous health monitoring: {str(e)}")
await asyncio.sleep(60) # Wait 1 minute before retrying
except Exception as e:
logger.error(f"Error starting continuous monitoring: {str(e)}")

View File

@@ -0,0 +1,507 @@
"""
Optimization Service
Performance optimization and monitoring.
"""
import logging
import time
import asyncio
from typing import Dict, Any, List, Optional, Callable
from datetime import datetime, timedelta
from sqlalchemy.orm import Session
from sqlalchemy import text
logger = logging.getLogger(__name__)
class PerformanceOptimizationService:
"""Service for performance optimization and monitoring."""
def __init__(self):
self.performance_metrics = {
'response_times': {},
'database_queries': {},
'memory_usage': {},
'cache_hit_rates': {}
}
self.optimization_config = {
'max_response_time': 2.0, # seconds
'max_database_queries': 10,
'max_memory_usage': 512, # MB
'min_cache_hit_rate': 0.8
}
async def optimize_response_time(self, operation_name: str, operation_func: Callable, *args, **kwargs) -> Dict[str, Any]:
"""Optimize response time for operations."""
try:
start_time = time.time()
# Execute operation
result = await operation_func(*args, **kwargs)
end_time = time.time()
response_time = end_time - start_time
# Record performance metrics
self._record_response_time(operation_name, response_time)
# Check if optimization is needed
if response_time > self.optimization_config['max_response_time']:
optimization_suggestions = await self._suggest_response_time_optimizations(operation_name, response_time)
logger.warning(f"Slow response time for {operation_name}: {response_time:.2f}s")
else:
optimization_suggestions = []
return {
'result': result,
'response_time': response_time,
'optimization_suggestions': optimization_suggestions,
'performance_status': 'optimal' if response_time <= self.optimization_config['max_response_time'] else 'needs_optimization'
}
except Exception as e:
logger.error(f"Error optimizing response time for {operation_name}: {str(e)}")
return {
'result': None,
'response_time': 0.0,
'optimization_suggestions': ['Error occurred during operation'],
'performance_status': 'error'
}
async def optimize_database_queries(self, db: Session, query_func: Callable, *args, **kwargs) -> Dict[str, Any]:
"""Optimize database queries."""
try:
start_time = time.time()
query_count_before = self._get_query_count(db)
# Execute query function
result = await query_func(db, *args, **kwargs)
end_time = time.time()
query_count_after = self._get_query_count(db)
query_count = query_count_after - query_count_before
response_time = end_time - start_time
# Record database performance
self._record_database_performance(query_func.__name__, query_count, response_time)
# Check if optimization is needed
if query_count > self.optimization_config['max_database_queries']:
optimization_suggestions = await self._suggest_database_optimizations(query_func.__name__, query_count, response_time)
logger.warning(f"High query count for {query_func.__name__}: {query_count} queries")
else:
optimization_suggestions = []
return {
'result': result,
'query_count': query_count,
'response_time': response_time,
'optimization_suggestions': optimization_suggestions,
'performance_status': 'optimal' if query_count <= self.optimization_config['max_database_queries'] else 'needs_optimization'
}
except Exception as e:
logger.error(f"Error optimizing database queries for {query_func.__name__}: {str(e)}")
return {
'result': None,
'query_count': 0,
'response_time': 0.0,
'optimization_suggestions': ['Error occurred during database operation'],
'performance_status': 'error'
}
async def optimize_memory_usage(self, operation_name: str, operation_func: Callable, *args, **kwargs) -> Dict[str, Any]:
"""Optimize memory usage for operations."""
try:
import psutil
import os
process = psutil.Process(os.getpid())
memory_before = process.memory_info().rss / 1024 / 1024 # MB
# Execute operation
result = await operation_func(*args, **kwargs)
memory_after = process.memory_info().rss / 1024 / 1024 # MB
memory_used = memory_after - memory_before
# Record memory usage
self._record_memory_usage(operation_name, memory_used)
# Check if optimization is needed
if memory_used > self.optimization_config['max_memory_usage']:
optimization_suggestions = await self._suggest_memory_optimizations(operation_name, memory_used)
logger.warning(f"High memory usage for {operation_name}: {memory_used:.2f}MB")
else:
optimization_suggestions = []
return {
'result': result,
'memory_used_mb': memory_used,
'optimization_suggestions': optimization_suggestions,
'performance_status': 'optimal' if memory_used <= self.optimization_config['max_memory_usage'] else 'needs_optimization'
}
except Exception as e:
logger.error(f"Error optimizing memory usage for {operation_name}: {str(e)}")
return {
'result': None,
'memory_used_mb': 0.0,
'optimization_suggestions': ['Error occurred during memory optimization'],
'performance_status': 'error'
}
async def optimize_cache_performance(self, cache_service, operation_name: str) -> Dict[str, Any]:
"""Optimize cache performance."""
try:
# Get cache statistics
cache_stats = await cache_service.get_cache_stats()
# Calculate cache hit rates
hit_rates = {}
for cache_type, stats in cache_stats.items():
if stats.get('entries', 0) > 0:
# This is a simplified calculation - in practice, you'd track actual hits/misses
hit_rates[cache_type] = 0.8 # Placeholder
# Record cache performance
self._record_cache_performance(operation_name, hit_rates)
# Check if optimization is needed
optimization_suggestions = []
for cache_type, hit_rate in hit_rates.items():
if hit_rate < self.optimization_config['min_cache_hit_rate']:
optimization_suggestions.append(f"Low cache hit rate for {cache_type}: {hit_rate:.2%}")
return {
'cache_stats': cache_stats,
'hit_rates': hit_rates,
'optimization_suggestions': optimization_suggestions,
'performance_status': 'optimal' if not optimization_suggestions else 'needs_optimization'
}
except Exception as e:
logger.error(f"Error optimizing cache performance: {str(e)}")
return {
'cache_stats': {},
'hit_rates': {},
'optimization_suggestions': ['Error occurred during cache optimization'],
'performance_status': 'error'
}
def _record_response_time(self, operation_name: str, response_time: float) -> None:
"""Record response time metrics."""
try:
if operation_name not in self.performance_metrics['response_times']:
self.performance_metrics['response_times'][operation_name] = []
self.performance_metrics['response_times'][operation_name].append({
'response_time': response_time,
'timestamp': datetime.utcnow().isoformat()
})
# Keep only last 100 entries
if len(self.performance_metrics['response_times'][operation_name]) > 100:
self.performance_metrics['response_times'][operation_name] = self.performance_metrics['response_times'][operation_name][-100:]
except Exception as e:
logger.error(f"Error recording response time: {str(e)}")
def _record_database_performance(self, operation_name: str, query_count: int, response_time: float) -> None:
"""Record database performance metrics."""
try:
if operation_name not in self.performance_metrics['database_queries']:
self.performance_metrics['database_queries'][operation_name] = []
self.performance_metrics['database_queries'][operation_name].append({
'query_count': query_count,
'response_time': response_time,
'timestamp': datetime.utcnow().isoformat()
})
# Keep only last 100 entries
if len(self.performance_metrics['database_queries'][operation_name]) > 100:
self.performance_metrics['database_queries'][operation_name] = self.performance_metrics['database_queries'][operation_name][-100:]
except Exception as e:
logger.error(f"Error recording database performance: {str(e)}")
def _record_memory_usage(self, operation_name: str, memory_used: float) -> None:
"""Record memory usage metrics."""
try:
if operation_name not in self.performance_metrics['memory_usage']:
self.performance_metrics['memory_usage'][operation_name] = []
self.performance_metrics['memory_usage'][operation_name].append({
'memory_used_mb': memory_used,
'timestamp': datetime.utcnow().isoformat()
})
# Keep only last 100 entries
if len(self.performance_metrics['memory_usage'][operation_name]) > 100:
self.performance_metrics['memory_usage'][operation_name] = self.performance_metrics['memory_usage'][operation_name][-100:]
except Exception as e:
logger.error(f"Error recording memory usage: {str(e)}")
def _record_cache_performance(self, operation_name: str, hit_rates: Dict[str, float]) -> None:
"""Record cache performance metrics."""
try:
if operation_name not in self.performance_metrics['cache_hit_rates']:
self.performance_metrics['cache_hit_rates'][operation_name] = []
self.performance_metrics['cache_hit_rates'][operation_name].append({
'hit_rates': hit_rates,
'timestamp': datetime.utcnow().isoformat()
})
# Keep only last 100 entries
if len(self.performance_metrics['cache_hit_rates'][operation_name]) > 100:
self.performance_metrics['cache_hit_rates'][operation_name] = self.performance_metrics['cache_hit_rates'][operation_name][-100:]
except Exception as e:
logger.error(f"Error recording cache performance: {str(e)}")
def _get_query_count(self, db: Session) -> int:
"""Get current query count from database session."""
try:
# This is a simplified implementation
# In practice, you'd use database-specific monitoring tools
return 0
except Exception as e:
logger.error(f"Error getting query count: {str(e)}")
return 0
async def _suggest_response_time_optimizations(self, operation_name: str, response_time: float) -> List[str]:
"""Suggest optimizations for slow response times."""
try:
suggestions = []
if response_time > 5.0:
suggestions.append("Consider implementing caching for this operation")
suggestions.append("Review database query optimization")
suggestions.append("Consider async processing for heavy operations")
elif response_time > 2.0:
suggestions.append("Optimize database queries")
suggestions.append("Consider adding indexes for frequently accessed data")
suggestions.append("Review data processing algorithms")
# Add operation-specific suggestions
if 'ai_analysis' in operation_name.lower():
suggestions.append("Consider implementing AI response caching")
suggestions.append("Review AI service integration efficiency")
elif 'onboarding' in operation_name.lower():
suggestions.append("Optimize data transformation algorithms")
suggestions.append("Consider batch processing for large datasets")
return suggestions
except Exception as e:
logger.error(f"Error suggesting response time optimizations: {str(e)}")
return ["Unable to generate optimization suggestions"]
async def _suggest_database_optimizations(self, operation_name: str, query_count: int, response_time: float) -> List[str]:
"""Suggest optimizations for database performance."""
try:
suggestions = []
if query_count > 20:
suggestions.append("Implement query batching to reduce database calls")
suggestions.append("Review and optimize N+1 query patterns")
suggestions.append("Consider implementing database connection pooling")
elif query_count > 10:
suggestions.append("Optimize database queries with proper indexing")
suggestions.append("Consider implementing query result caching")
suggestions.append("Review database schema for optimization opportunities")
if response_time > 1.0:
suggestions.append("Add database indexes for frequently queried columns")
suggestions.append("Consider read replicas for heavy read operations")
suggestions.append("Optimize database connection settings")
# Add operation-specific suggestions
if 'strategy' in operation_name.lower():
suggestions.append("Consider implementing strategy data caching")
suggestions.append("Optimize strategy-related database queries")
elif 'onboarding' in operation_name.lower():
suggestions.append("Batch onboarding data processing")
suggestions.append("Optimize onboarding data retrieval queries")
return suggestions
except Exception as e:
logger.error(f"Error suggesting database optimizations: {str(e)}")
return ["Unable to generate database optimization suggestions"]
async def _suggest_memory_optimizations(self, operation_name: str, memory_used: float) -> List[str]:
"""Suggest optimizations for memory usage."""
try:
suggestions = []
if memory_used > 100:
suggestions.append("Implement data streaming for large datasets")
suggestions.append("Review memory-intensive data structures")
suggestions.append("Consider implementing pagination")
elif memory_used > 50:
suggestions.append("Optimize data processing algorithms")
suggestions.append("Review object lifecycle management")
suggestions.append("Consider implementing lazy loading")
# Add operation-specific suggestions
if 'ai_analysis' in operation_name.lower():
suggestions.append("Implement AI response streaming")
suggestions.append("Optimize AI model memory usage")
elif 'onboarding' in operation_name.lower():
suggestions.append("Process onboarding data in smaller chunks")
suggestions.append("Implement data cleanup after processing")
return suggestions
except Exception as e:
logger.error(f"Error suggesting memory optimizations: {str(e)}")
return ["Unable to generate memory optimization suggestions"]
async def get_performance_report(self) -> Dict[str, Any]:
"""Generate comprehensive performance report."""
try:
report = {
'timestamp': datetime.utcnow().isoformat(),
'response_times': self._calculate_average_response_times(),
'database_performance': self._calculate_database_performance(),
'memory_usage': self._calculate_memory_usage(),
'cache_performance': self._calculate_cache_performance(),
'optimization_recommendations': await self._generate_optimization_recommendations()
}
return report
except Exception as e:
logger.error(f"Error generating performance report: {str(e)}")
return {
'timestamp': datetime.utcnow().isoformat(),
'error': str(e)
}
def _calculate_average_response_times(self) -> Dict[str, float]:
"""Calculate average response times for operations."""
try:
averages = {}
for operation_name, times in self.performance_metrics['response_times'].items():
if times:
avg_time = sum(t['response_time'] for t in times) / len(times)
averages[operation_name] = avg_time
return averages
except Exception as e:
logger.error(f"Error calculating average response times: {str(e)}")
return {}
def _calculate_database_performance(self) -> Dict[str, Dict[str, float]]:
"""Calculate database performance metrics."""
try:
performance = {}
for operation_name, queries in self.performance_metrics['database_queries'].items():
if queries:
avg_queries = sum(q['query_count'] for q in queries) / len(queries)
avg_time = sum(q['response_time'] for q in queries) / len(queries)
performance[operation_name] = {
'average_queries': avg_queries,
'average_response_time': avg_time
}
return performance
except Exception as e:
logger.error(f"Error calculating database performance: {str(e)}")
return {}
def _calculate_memory_usage(self) -> Dict[str, float]:
"""Calculate average memory usage for operations."""
try:
averages = {}
for operation_name, usage in self.performance_metrics['memory_usage'].items():
if usage:
avg_memory = sum(u['memory_used_mb'] for u in usage) / len(usage)
averages[operation_name] = avg_memory
return averages
except Exception as e:
logger.error(f"Error calculating memory usage: {str(e)}")
return {}
def _calculate_cache_performance(self) -> Dict[str, float]:
"""Calculate cache performance metrics."""
try:
performance = {}
for operation_name, rates in self.performance_metrics['cache_hit_rates'].items():
if rates:
# Calculate average hit rate across all cache types
all_rates = []
for rate_data in rates:
if rate_data['hit_rates']:
avg_rate = sum(rate_data['hit_rates'].values()) / len(rate_data['hit_rates'])
all_rates.append(avg_rate)
if all_rates:
performance[operation_name] = sum(all_rates) / len(all_rates)
return performance
except Exception as e:
logger.error(f"Error calculating cache performance: {str(e)}")
return {}
async def _generate_optimization_recommendations(self) -> List[str]:
"""Generate optimization recommendations based on performance data."""
try:
recommendations = []
# Check response times
avg_response_times = self._calculate_average_response_times()
for operation, avg_time in avg_response_times.items():
if avg_time > self.optimization_config['max_response_time']:
recommendations.append(f"Optimize response time for {operation} (avg: {avg_time:.2f}s)")
# Check database performance
db_performance = self._calculate_database_performance()
for operation, perf in db_performance.items():
if perf['average_queries'] > self.optimization_config['max_database_queries']:
recommendations.append(f"Reduce database queries for {operation} (avg: {perf['average_queries']:.1f} queries)")
# Check memory usage
memory_usage = self._calculate_memory_usage()
for operation, memory in memory_usage.items():
if memory > self.optimization_config['max_memory_usage']:
recommendations.append(f"Optimize memory usage for {operation} (avg: {memory:.1f}MB)")
return recommendations
except Exception as e:
logger.error(f"Error generating optimization recommendations: {str(e)}")
return ["Unable to generate optimization recommendations"]
async def cleanup_old_metrics(self, days_to_keep: int = 30) -> Dict[str, int]:
"""Clean up old performance metrics."""
try:
cutoff_date = datetime.utcnow() - timedelta(days=days_to_keep)
cleaned_count = 0
for metric_type, operations in self.performance_metrics.items():
for operation_name, metrics in operations.items():
if isinstance(metrics, list):
original_count = len(metrics)
# Filter out old metrics
self.performance_metrics[metric_type][operation_name] = [
m for m in metrics
if datetime.fromisoformat(m['timestamp']) > cutoff_date
]
cleaned_count += original_count - len(self.performance_metrics[metric_type][operation_name])
logger.info(f"Cleaned up {cleaned_count} old performance metrics")
return {'cleaned_count': cleaned_count}
except Exception as e:
logger.error(f"Error cleaning up old metrics: {str(e)}")
return {'cleaned_count': 0}

View File

@@ -0,0 +1,9 @@
"""
Utils Module
Data processing and validation utilities.
"""
from .data_processors import DataProcessorService
from .validators import ValidationService
__all__ = ['DataProcessorService', 'ValidationService']

View File

@@ -0,0 +1,451 @@
"""
Data Processor Service
Data processing utilities.
"""
import logging
import json
import re
from typing import Dict, Any, List, Optional, Union
from datetime import datetime, timedelta
logger = logging.getLogger(__name__)
class DataProcessorService:
"""Service for data processing utilities."""
def __init__(self):
self.cleaning_patterns = {
'html_tags': re.compile(r'<[^>]+>'),
'extra_whitespace': re.compile(r'\s+'),
'special_chars': re.compile(r'[^\w\s\-.,!?;:()]'),
'multiple_spaces': re.compile(r'\s{2,}'),
'leading_trailing_spaces': re.compile(r'^\s+|\s+$')
}
def transform_data_structure(self, data: Union[Dict, List, str], target_format: str = 'dict') -> Union[Dict, List, str]:
"""Transform data between different structures."""
try:
if target_format == 'dict':
if isinstance(data, dict):
return data
elif isinstance(data, list):
return {str(i): item for i, item in enumerate(data)}
elif isinstance(data, str):
try:
return json.loads(data)
except json.JSONDecodeError:
return {'value': data}
else:
return {'value': str(data)}
elif target_format == 'list':
if isinstance(data, list):
return data
elif isinstance(data, dict):
return list(data.values())
elif isinstance(data, str):
return [data]
else:
return [str(data)]
elif target_format == 'string':
if isinstance(data, str):
return data
elif isinstance(data, (dict, list)):
return json.dumps(data, default=str)
else:
return str(data)
else:
logger.warning(f"Unknown target format: {target_format}")
return data
except Exception as e:
logger.error(f"Error transforming data structure: {str(e)}")
return data
def clean_text_data(self, text: str, cleaning_level: str = 'standard') -> str:
"""Clean and normalize text data."""
try:
if not isinstance(text, str):
text = str(text)
if cleaning_level == 'minimal':
# Basic cleaning
cleaned = self.cleaning_patterns['leading_trailing_spaces'].sub('', text)
cleaned = self.cleaning_patterns['multiple_spaces'].sub(' ', cleaned)
return cleaned.strip()
elif cleaning_level == 'standard':
# Standard cleaning
cleaned = self.cleaning_patterns['html_tags'].sub('', text)
cleaned = self.cleaning_patterns['leading_trailing_spaces'].sub('', cleaned)
cleaned = self.cleaning_patterns['multiple_spaces'].sub(' ', cleaned)
return cleaned.strip()
elif cleaning_level == 'aggressive':
# Aggressive cleaning
cleaned = self.cleaning_patterns['html_tags'].sub('', text)
cleaned = self.cleaning_patterns['special_chars'].sub('', cleaned)
cleaned = self.cleaning_patterns['leading_trailing_spaces'].sub('', cleaned)
cleaned = self.cleaning_patterns['multiple_spaces'].sub(' ', cleaned)
return cleaned.strip()
else:
logger.warning(f"Unknown cleaning level: {cleaning_level}")
return text.strip()
except Exception as e:
logger.error(f"Error cleaning text data: {str(e)}")
return str(text)
def clean_dict_data(self, data: Dict[str, Any], cleaning_level: str = 'standard') -> Dict[str, Any]:
"""Clean dictionary data recursively."""
try:
cleaned_data = {}
for key, value in data.items():
# Clean key
cleaned_key = self.clean_text_data(str(key), cleaning_level)
# Clean value
if isinstance(value, str):
cleaned_value = self.clean_text_data(value, cleaning_level)
elif isinstance(value, dict):
cleaned_value = self.clean_dict_data(value, cleaning_level)
elif isinstance(value, list):
cleaned_value = [self.clean_text_data(str(item), cleaning_level) if isinstance(item, str) else item for item in value]
else:
cleaned_value = value
cleaned_data[cleaned_key] = cleaned_value
return cleaned_data
except Exception as e:
logger.error(f"Error cleaning dict data: {str(e)}")
return data
def enrich_data_with_metadata(self, data: Dict[str, Any], source: str = 'unknown') -> Dict[str, Any]:
"""Enrich data with metadata."""
try:
enriched_data = data.copy()
# Add metadata
enriched_data['_metadata'] = {
'processed_at': datetime.utcnow().isoformat(),
'source': source,
'data_type': self._determine_data_type(data),
'size': len(str(data)),
'field_count': len(data) if isinstance(data, dict) else 0
}
return enriched_data
except Exception as e:
logger.error(f"Error enriching data with metadata: {str(e)}")
return data
def _determine_data_type(self, data: Any) -> str:
"""Determine the type of data."""
try:
if isinstance(data, dict):
return 'object'
elif isinstance(data, list):
return 'array'
elif isinstance(data, str):
return 'string'
elif isinstance(data, (int, float)):
return 'number'
elif isinstance(data, bool):
return 'boolean'
else:
return 'unknown'
except Exception as e:
logger.error(f"Error determining data type: {str(e)}")
return 'unknown'
def validate_data_completeness(self, data: Dict[str, Any], required_fields: List[str]) -> Dict[str, Any]:
"""Validate data completeness against required fields."""
try:
validation_result = {
'is_complete': True,
'missing_fields': [],
'present_fields': [],
'completeness_score': 0.0,
'validation_timestamp': datetime.utcnow().isoformat()
}
present_count = 0
for field in required_fields:
if field in data and data[field] is not None and data[field] != '':
validation_result['present_fields'].append(field)
present_count += 1
else:
validation_result['missing_fields'].append(field)
# Calculate completeness score
if required_fields:
validation_result['completeness_score'] = present_count / len(required_fields)
validation_result['is_complete'] = validation_result['completeness_score'] >= 0.8
return validation_result
except Exception as e:
logger.error(f"Error validating data completeness: {str(e)}")
return {
'is_complete': False,
'missing_fields': required_fields,
'present_fields': [],
'completeness_score': 0.0,
'validation_timestamp': datetime.utcnow().isoformat(),
'error': str(e)
}
def normalize_field_values(self, data: Dict[str, Any], field_mappings: Dict[str, str]) -> Dict[str, Any]:
"""Normalize field values based on mappings."""
try:
normalized_data = {}
for original_field, normalized_field in field_mappings.items():
if original_field in data:
normalized_data[normalized_field] = data[original_field]
return normalized_data
except Exception as e:
logger.error(f"Error normalizing field values: {str(e)}")
return data
def merge_data_sources(self, data_sources: List[Dict[str, Any]], merge_strategy: str = 'prefer_first') -> Dict[str, Any]:
"""Merge multiple data sources."""
try:
if not data_sources:
return {}
if len(data_sources) == 1:
return data_sources[0]
merged_data = {}
if merge_strategy == 'prefer_first':
# Prefer first non-empty value
for source in data_sources:
for key, value in source.items():
if key not in merged_data or merged_data[key] is None or merged_data[key] == '':
merged_data[key] = value
elif merge_strategy == 'prefer_last':
# Prefer last non-empty value
for source in data_sources:
for key, value in source.items():
if value is not None and value != '':
merged_data[key] = value
elif merge_strategy == 'combine':
# Combine all values
for source in data_sources:
for key, value in source.items():
if key not in merged_data:
merged_data[key] = []
if isinstance(merged_data[key], list):
merged_data[key].append(value)
else:
merged_data[key] = [merged_data[key], value]
elif merge_strategy == 'intersection':
# Only include fields present in all sources
common_keys = set(data_sources[0].keys())
for source in data_sources[1:]:
common_keys = common_keys.intersection(set(source.keys()))
for key in common_keys:
values = [source[key] for source in data_sources if key in source]
merged_data[key] = values[0] if values else None
return merged_data
except Exception as e:
logger.error(f"Error merging data sources: {str(e)}")
return data_sources[0] if data_sources else {}
def filter_data_by_criteria(self, data: Dict[str, Any], criteria: Dict[str, Any]) -> Dict[str, Any]:
"""Filter data based on criteria."""
try:
filtered_data = {}
for key, value in data.items():
include_field = True
# Check if field should be included based on criteria
if 'include_fields' in criteria and key not in criteria['include_fields']:
include_field = False
if 'exclude_fields' in criteria and key in criteria['exclude_fields']:
include_field = False
# Check value-based criteria
if 'min_length' in criteria and isinstance(value, str) and len(value) < criteria['min_length']:
include_field = False
if 'max_length' in criteria and isinstance(value, str) and len(value) > criteria['max_length']:
include_field = False
if 'required_values' in criteria and key in criteria['required_values']:
if value not in criteria['required_values'][key]:
include_field = False
if include_field:
filtered_data[key] = value
return filtered_data
except Exception as e:
logger.error(f"Error filtering data by criteria: {str(e)}")
return data
def format_data_for_output(self, data: Dict[str, Any], output_format: str = 'json') -> Union[str, Dict[str, Any]]:
"""Format data for different output formats."""
try:
if output_format == 'json':
return json.dumps(data, indent=2, default=str)
elif output_format == 'dict':
return data
elif output_format == 'csv':
# Convert to CSV format (simplified)
csv_lines = []
if data:
# Headers
headers = list(data.keys())
csv_lines.append(','.join(headers))
# Values
values = [str(data.get(header, '')) for header in headers]
csv_lines.append(','.join(values))
return '\n'.join(csv_lines)
elif output_format == 'xml':
# Convert to XML format (simplified)
xml_lines = ['<?xml version="1.0" encoding="UTF-8"?>', '<data>']
for key, value in data.items():
xml_lines.append(f' <{key}>{value}</{key}>')
xml_lines.append('</data>')
return '\n'.join(xml_lines)
else:
logger.warning(f"Unknown output format: {output_format}")
return data
except Exception as e:
logger.error(f"Error formatting data for output: {str(e)}")
return str(data)
def validate_data_types(self, data: Dict[str, Any], type_schema: Dict[str, str]) -> Dict[str, Any]:
"""Validate data types against a schema."""
try:
validation_result = {
'is_valid': True,
'type_errors': [],
'validation_timestamp': datetime.utcnow().isoformat()
}
for field, expected_type in type_schema.items():
if field in data:
value = data[field]
actual_type = self._determine_data_type(value)
if actual_type != expected_type:
validation_result['type_errors'].append({
'field': field,
'expected_type': expected_type,
'actual_type': actual_type,
'value': value
})
validation_result['is_valid'] = False
return validation_result
except Exception as e:
logger.error(f"Error validating data types: {str(e)}")
return {
'is_valid': False,
'type_errors': [{'error': str(e)}],
'validation_timestamp': datetime.utcnow().isoformat()
}
def sanitize_sensitive_data(self, data: Dict[str, Any], sensitive_fields: List[str]) -> Dict[str, Any]:
"""Sanitize sensitive data fields."""
try:
sanitized_data = data.copy()
for field in sensitive_fields:
if field in sanitized_data:
value = sanitized_data[field]
if isinstance(value, str) and len(value) > 4:
# Replace with asterisks, keeping first and last character
sanitized_data[field] = value[0] + '*' * (len(value) - 2) + value[-1]
else:
sanitized_data[field] = '***'
return sanitized_data
except Exception as e:
logger.error(f"Error sanitizing sensitive data: {str(e)}")
return data
def calculate_data_statistics(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""Calculate statistics about the data."""
try:
stats = {
'total_fields': len(data),
'string_fields': 0,
'numeric_fields': 0,
'boolean_fields': 0,
'object_fields': 0,
'array_fields': 0,
'null_fields': 0,
'empty_fields': 0,
'average_field_length': 0.0
}
total_length = 0
field_count = 0
for key, value in data.items():
if value is None:
stats['null_fields'] += 1
elif value == '':
stats['empty_fields'] += 1
else:
data_type = self._determine_data_type(value)
if data_type == 'string':
stats['string_fields'] += 1
total_length += len(str(value))
field_count += 1
elif data_type == 'number':
stats['numeric_fields'] += 1
elif data_type == 'boolean':
stats['boolean_fields'] += 1
elif data_type == 'object':
stats['object_fields'] += 1
elif data_type == 'array':
stats['array_fields'] += 1
if field_count > 0:
stats['average_field_length'] = total_length / field_count
return stats
except Exception as e:
logger.error(f"Error calculating data statistics: {str(e)}")
return {
'error': str(e),
'total_fields': 0
}

View File

@@ -0,0 +1,473 @@
"""
Validation Service
Data validation utilities.
"""
import logging
import re
from typing import Dict, Any, List, Optional, Union
from datetime import datetime, timedelta
logger = logging.getLogger(__name__)
class ValidationService:
"""Service for data validation and business rule checking."""
def __init__(self):
self.validation_patterns = {
'email': re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'),
'url': re.compile(r'^https?://(?:[-\w.])+(?:[:\d]+)?(?:/(?:[\w/_.])*(?:\?(?:[\w&=%.])*)?(?:#(?:[\w.])*)?)?$'),
'phone': re.compile(r'^\+?1?\d{9,15}$'),
'domain': re.compile(r'^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$'),
'alphanumeric': re.compile(r'^[a-zA-Z0-9\s]+$'),
'numeric': re.compile(r'^\d+(\.\d+)?$'),
'integer': re.compile(r'^\d+$')
}
self.business_rules = {
'content_budget': {
'min_value': 0,
'max_value': 1000000,
'required': True
},
'team_size': {
'min_value': 1,
'max_value': 100,
'required': True
},
'implementation_timeline': {
'min_days': 1,
'max_days': 365,
'required': True
},
'market_share': {
'min_value': 0,
'max_value': 100,
'required': False
}
}
def validate_field(self, field_name: str, value: Any, field_type: str = 'string', **kwargs) -> Dict[str, Any]:
"""Validate a single field."""
try:
validation_result = {
'field_name': field_name,
'value': value,
'is_valid': True,
'errors': [],
'warnings': [],
'validation_timestamp': datetime.utcnow().isoformat()
}
# Check if value is required
if kwargs.get('required', False) and (value is None or value == ''):
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' is required")
return validation_result
# Skip validation if value is None and not required
if value is None or value == '':
return validation_result
# Type-specific validation
if field_type == 'email':
validation_result = self._validate_email(field_name, value, validation_result)
elif field_type == 'url':
validation_result = self._validate_url(field_name, value, validation_result)
elif field_type == 'phone':
validation_result = self._validate_phone(field_name, value, validation_result)
elif field_type == 'domain':
validation_result = self._validate_domain(field_name, value, validation_result)
elif field_type == 'alphanumeric':
validation_result = self._validate_alphanumeric(field_name, value, validation_result)
elif field_type == 'numeric':
validation_result = self._validate_numeric(field_name, value, validation_result)
elif field_type == 'integer':
validation_result = self._validate_integer(field_name, value, validation_result)
elif field_type == 'date':
validation_result = self._validate_date(field_name, value, validation_result)
elif field_type == 'json':
validation_result = self._validate_json(field_name, value, validation_result)
else:
validation_result = self._validate_string(field_name, value, validation_result)
# Length validation
if 'min_length' in kwargs and len(str(value)) < kwargs['min_length']:
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must be at least {kwargs['min_length']} characters long")
if 'max_length' in kwargs and len(str(value)) > kwargs['max_length']:
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must be no more than {kwargs['max_length']} characters long")
# Range validation for numeric fields
if field_type in ['numeric', 'integer']:
if 'min_value' in kwargs and float(value) < kwargs['min_value']:
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must be at least {kwargs['min_value']}")
if 'max_value' in kwargs and float(value) > kwargs['max_value']:
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must be no more than {kwargs['max_value']}")
return validation_result
except Exception as e:
logger.error(f"Error validating field {field_name}: {str(e)}")
return {
'field_name': field_name,
'value': value,
'is_valid': False,
'errors': [f"Validation error: {str(e)}"],
'warnings': [],
'validation_timestamp': datetime.utcnow().isoformat()
}
def validate_business_rules(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""Validate data against business rules."""
try:
validation_result = {
'is_valid': True,
'errors': [],
'warnings': [],
'field_validations': {},
'validation_timestamp': datetime.utcnow().isoformat()
}
for field_name, rules in self.business_rules.items():
if field_name in data:
field_validation = self.validate_field(
field_name,
data[field_name],
**rules
)
validation_result['field_validations'][field_name] = field_validation
if not field_validation['is_valid']:
validation_result['is_valid'] = False
validation_result['errors'].extend(field_validation['errors'])
validation_result['warnings'].extend(field_validation['warnings'])
elif rules.get('required', False):
validation_result['is_valid'] = False
validation_result['errors'].append(f"Required field '{field_name}' is missing")
return validation_result
except Exception as e:
logger.error(f"Error validating business rules: {str(e)}")
return {
'is_valid': False,
'errors': [f"Business rule validation error: {str(e)}"],
'warnings': [],
'field_validations': {},
'validation_timestamp': datetime.utcnow().isoformat()
}
def validate_strategy_data(self, strategy_data: Dict[str, Any]) -> Dict[str, Any]:
"""Validate content strategy data specifically."""
try:
validation_result = {
'is_valid': True,
'errors': [],
'warnings': [],
'field_validations': {},
'validation_timestamp': datetime.utcnow().isoformat()
}
# Required fields for content strategy
required_fields = [
'business_objectives', 'target_metrics', 'content_budget',
'team_size', 'implementation_timeline'
]
for field in required_fields:
if field not in strategy_data or strategy_data[field] is None or strategy_data[field] == '':
validation_result['is_valid'] = False
validation_result['errors'].append(f"Required field '{field}' is missing")
else:
# Validate specific field types
if field == 'content_budget':
field_validation = self.validate_field(field, strategy_data[field], 'numeric', min_value=0, max_value=1000000)
elif field == 'team_size':
field_validation = self.validate_field(field, strategy_data[field], 'integer', min_value=1, max_value=100)
elif field == 'implementation_timeline':
field_validation = self.validate_field(field, strategy_data[field], 'string', min_length=1, max_length=500)
else:
field_validation = self.validate_field(field, strategy_data[field], 'string', min_length=1)
validation_result['field_validations'][field] = field_validation
if not field_validation['is_valid']:
validation_result['is_valid'] = False
validation_result['errors'].extend(field_validation['errors'])
validation_result['warnings'].extend(field_validation['warnings'])
# Validate optional fields
optional_fields = {
'market_share': ('numeric', {'min_value': 0, 'max_value': 100}),
'competitive_position': ('string', {'max_length': 1000}),
'content_preferences': ('string', {'max_length': 2000}),
'audience_pain_points': ('string', {'max_length': 2000}),
'top_competitors': ('string', {'max_length': 1000}),
'industry_trends': ('string', {'max_length': 1000})
}
for field, (field_type, validation_params) in optional_fields.items():
if field in strategy_data and strategy_data[field]:
field_validation = self.validate_field(field, strategy_data[field], field_type, **validation_params)
validation_result['field_validations'][field] = field_validation
if not field_validation['is_valid']:
validation_result['warnings'].extend(field_validation['errors'])
validation_result['warnings'].extend(field_validation['warnings'])
return validation_result
except Exception as e:
logger.error(f"Error validating strategy data: {str(e)}")
return {
'is_valid': False,
'errors': [f"Strategy validation error: {str(e)}"],
'warnings': [],
'field_validations': {},
'validation_timestamp': datetime.utcnow().isoformat()
}
def _validate_email(self, field_name: str, value: str, validation_result: Dict[str, Any]) -> Dict[str, Any]:
"""Validate email format."""
try:
if not self.validation_patterns['email'].match(value):
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must be a valid email address")
return validation_result
except Exception as e:
logger.error(f"Error validating email: {str(e)}")
validation_result['is_valid'] = False
validation_result['errors'].append(f"Email validation error: {str(e)}")
return validation_result
def _validate_url(self, field_name: str, value: str, validation_result: Dict[str, Any]) -> Dict[str, Any]:
"""Validate URL format."""
try:
if not self.validation_patterns['url'].match(value):
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must be a valid URL")
return validation_result
except Exception as e:
logger.error(f"Error validating URL: {str(e)}")
validation_result['is_valid'] = False
validation_result['errors'].append(f"URL validation error: {str(e)}")
return validation_result
def _validate_phone(self, field_name: str, value: str, validation_result: Dict[str, Any]) -> Dict[str, Any]:
"""Validate phone number format."""
try:
if not self.validation_patterns['phone'].match(value):
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must be a valid phone number")
return validation_result
except Exception as e:
logger.error(f"Error validating phone: {str(e)}")
validation_result['is_valid'] = False
validation_result['errors'].append(f"Phone validation error: {str(e)}")
return validation_result
def _validate_domain(self, field_name: str, value: str, validation_result: Dict[str, Any]) -> Dict[str, Any]:
"""Validate domain format."""
try:
if not self.validation_patterns['domain'].match(value):
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must be a valid domain")
return validation_result
except Exception as e:
logger.error(f"Error validating domain: {str(e)}")
validation_result['is_valid'] = False
validation_result['errors'].append(f"Domain validation error: {str(e)}")
return validation_result
def _validate_alphanumeric(self, field_name: str, value: str, validation_result: Dict[str, Any]) -> Dict[str, Any]:
"""Validate alphanumeric format."""
try:
if not self.validation_patterns['alphanumeric'].match(value):
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must contain only letters, numbers, and spaces")
return validation_result
except Exception as e:
logger.error(f"Error validating alphanumeric: {str(e)}")
validation_result['is_valid'] = False
validation_result['errors'].append(f"Alphanumeric validation error: {str(e)}")
return validation_result
def _validate_numeric(self, field_name: str, value: Union[str, int, float], validation_result: Dict[str, Any]) -> Dict[str, Any]:
"""Validate numeric format."""
try:
if isinstance(value, (int, float)):
return validation_result
if not self.validation_patterns['numeric'].match(str(value)):
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must be a valid number")
return validation_result
except Exception as e:
logger.error(f"Error validating numeric: {str(e)}")
validation_result['is_valid'] = False
validation_result['errors'].append(f"Numeric validation error: {str(e)}")
return validation_result
def _validate_integer(self, field_name: str, value: Union[str, int], validation_result: Dict[str, Any]) -> Dict[str, Any]:
"""Validate integer format."""
try:
if isinstance(value, int):
return validation_result
if not self.validation_patterns['integer'].match(str(value)):
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must be a valid integer")
return validation_result
except Exception as e:
logger.error(f"Error validating integer: {str(e)}")
validation_result['is_valid'] = False
validation_result['errors'].append(f"Integer validation error: {str(e)}")
return validation_result
def _validate_date(self, field_name: str, value: Union[str, datetime], validation_result: Dict[str, Any]) -> Dict[str, Any]:
"""Validate date format."""
try:
if isinstance(value, datetime):
return validation_result
# Try to parse date string
try:
datetime.fromisoformat(str(value).replace('Z', '+00:00'))
except ValueError:
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must be a valid date")
return validation_result
except Exception as e:
logger.error(f"Error validating date: {str(e)}")
validation_result['is_valid'] = False
validation_result['errors'].append(f"Date validation error: {str(e)}")
return validation_result
def _validate_json(self, field_name: str, value: Union[str, dict, list], validation_result: Dict[str, Any]) -> Dict[str, Any]:
"""Validate JSON format."""
try:
if isinstance(value, (dict, list)):
return validation_result
import json
try:
json.loads(str(value))
except json.JSONDecodeError:
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must be valid JSON")
return validation_result
except Exception as e:
logger.error(f"Error validating JSON: {str(e)}")
validation_result['is_valid'] = False
validation_result['errors'].append(f"JSON validation error: {str(e)}")
return validation_result
def _validate_string(self, field_name: str, value: str, validation_result: Dict[str, Any]) -> Dict[str, Any]:
"""Validate string format."""
try:
if not isinstance(value, str):
validation_result['is_valid'] = False
validation_result['errors'].append(f"Field '{field_name}' must be a string")
return validation_result
except Exception as e:
logger.error(f"Error validating string: {str(e)}")
validation_result['is_valid'] = False
validation_result['errors'].append(f"String validation error: {str(e)}")
return validation_result
def generate_validation_error_message(self, validation_result: Dict[str, Any]) -> str:
"""Generate a user-friendly error message from validation results."""
try:
if validation_result['is_valid']:
return "Validation passed successfully"
if 'errors' in validation_result and validation_result['errors']:
error_count = len(validation_result['errors'])
if error_count == 1:
return f"Validation error: {validation_result['errors'][0]}"
else:
return f"Validation failed with {error_count} errors: {'; '.join(validation_result['errors'])}"
return "Validation failed with unknown errors"
except Exception as e:
logger.error(f"Error generating validation error message: {str(e)}")
return "Error generating validation message"
def get_validation_summary(self, validation_results: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Generate a summary of multiple validation results."""
try:
summary = {
'total_validations': len(validation_results),
'passed_validations': 0,
'failed_validations': 0,
'total_errors': 0,
'total_warnings': 0,
'field_summary': {},
'validation_timestamp': datetime.utcnow().isoformat()
}
for result in validation_results:
if result.get('is_valid', False):
summary['passed_validations'] += 1
else:
summary['failed_validations'] += 1
summary['total_errors'] += len(result.get('errors', []))
summary['total_warnings'] += len(result.get('warnings', []))
field_name = result.get('field_name', 'unknown')
if field_name not in summary['field_summary']:
summary['field_summary'][field_name] = {
'validations': 0,
'errors': 0,
'warnings': 0
}
summary['field_summary'][field_name]['validations'] += 1
summary['field_summary'][field_name]['errors'] += len(result.get('errors', []))
summary['field_summary'][field_name]['warnings'] += len(result.get('warnings', []))
return summary
except Exception as e:
logger.error(f"Error generating validation summary: {str(e)}")
return {
'total_validations': 0,
'passed_validations': 0,
'failed_validations': 0,
'total_errors': 0,
'total_warnings': 0,
'field_summary': {},
'validation_timestamp': datetime.utcnow().isoformat(),
'error': str(e)
}

View File

@@ -0,0 +1,232 @@
"""
Enhanced Strategy Database Service
Handles database operations for enhanced content strategy functionality.
"""
import json
import logging
from typing import Dict, List, Any, Optional
from datetime import datetime
from sqlalchemy.orm import Session
from sqlalchemy import and_, or_
# Import database models
from models.enhanced_strategy_models import EnhancedContentStrategy, EnhancedAIAnalysisResult, OnboardingDataIntegration
logger = logging.getLogger(__name__)
class EnhancedStrategyDBService:
"""Database service for enhanced content strategy operations."""
def __init__(self, db: Session):
self.db = db
async def get_enhanced_strategy(self, strategy_id: int) -> Optional[EnhancedContentStrategy]:
"""Get an enhanced strategy by ID."""
try:
return self.db.query(EnhancedContentStrategy).filter(EnhancedContentStrategy.id == strategy_id).first()
except Exception as e:
logger.error(f"Error getting enhanced strategy {strategy_id}: {str(e)}")
return None
async def get_enhanced_strategies(self, user_id: Optional[int] = None, strategy_id: Optional[int] = None) -> List[EnhancedContentStrategy]:
"""Get enhanced strategies with optional filtering."""
try:
query = self.db.query(EnhancedContentStrategy)
if user_id:
query = query.filter(EnhancedContentStrategy.user_id == user_id)
if strategy_id:
query = query.filter(EnhancedContentStrategy.id == strategy_id)
return query.all()
except Exception as e:
logger.error(f"Error getting enhanced strategies: {str(e)}")
return []
async def create_enhanced_strategy(self, strategy_data: Dict[str, Any]) -> Optional[EnhancedContentStrategy]:
"""Create a new enhanced strategy."""
try:
strategy = EnhancedContentStrategy(**strategy_data)
self.db.add(strategy)
self.db.commit()
self.db.refresh(strategy)
return strategy
except Exception as e:
logger.error(f"Error creating enhanced strategy: {str(e)}")
self.db.rollback()
return None
async def update_enhanced_strategy(self, strategy_id: int, update_data: Dict[str, Any]) -> Optional[EnhancedContentStrategy]:
"""Update an enhanced strategy."""
try:
strategy = await self.get_enhanced_strategy(strategy_id)
if not strategy:
return None
for key, value in update_data.items():
if hasattr(strategy, key):
setattr(strategy, key, value)
strategy.updated_at = datetime.utcnow()
self.db.commit()
self.db.refresh(strategy)
return strategy
except Exception as e:
logger.error(f"Error updating enhanced strategy {strategy_id}: {str(e)}")
self.db.rollback()
return None
async def delete_enhanced_strategy(self, strategy_id: int) -> bool:
"""Delete an enhanced strategy."""
try:
strategy = await self.get_enhanced_strategy(strategy_id)
if not strategy:
return False
self.db.delete(strategy)
self.db.commit()
return True
except Exception as e:
logger.error(f"Error deleting enhanced strategy {strategy_id}: {str(e)}")
self.db.rollback()
return False
async def get_enhanced_strategies_with_analytics(self, strategy_id: Optional[int] = None) -> List[Dict[str, Any]]:
"""Get enhanced strategies with analytics data."""
try:
strategies = await self.get_enhanced_strategies(strategy_id=strategy_id)
result = []
for strategy in strategies:
strategy_dict = strategy.to_dict() if hasattr(strategy, 'to_dict') else {
'id': strategy.id,
'name': strategy.name,
'industry': strategy.industry,
'user_id': strategy.user_id,
'created_at': strategy.created_at.isoformat() if strategy.created_at else None,
'updated_at': strategy.updated_at.isoformat() if strategy.updated_at else None
}
# Add analytics data
analytics = await self.get_ai_analysis_history(strategy.id, limit=5)
strategy_dict['analytics'] = analytics
result.append(strategy_dict)
return result
except Exception as e:
logger.error(f"Error getting enhanced strategies with analytics: {str(e)}")
return []
async def get_ai_analysis_history(self, strategy_id: int, limit: int = 10) -> List[Dict[str, Any]]:
"""Get AI analysis history for a strategy."""
try:
analyses = self.db.query(EnhancedAIAnalysisResult).filter(
EnhancedAIAnalysisResult.strategy_id == strategy_id
).order_by(EnhancedAIAnalysisResult.created_at.desc()).limit(limit).all()
return [analysis.to_dict() if hasattr(analysis, 'to_dict') else {
'id': analysis.id,
'analysis_type': analysis.analysis_type,
'insights': analysis.insights,
'recommendations': analysis.recommendations,
'created_at': analysis.created_at.isoformat() if analysis.created_at else None
} for analysis in analyses]
except Exception as e:
logger.error(f"Error getting AI analysis history for strategy {strategy_id}: {str(e)}")
return []
async def get_onboarding_integration(self, strategy_id: int) -> Optional[Dict[str, Any]]:
"""Get onboarding integration data for a strategy."""
try:
integration = self.db.query(OnboardingDataIntegration).filter(
OnboardingDataIntegration.strategy_id == strategy_id
).first()
if integration:
return integration.to_dict() if hasattr(integration, 'to_dict') else {
'id': integration.id,
'strategy_id': integration.strategy_id,
'data_sources': integration.data_sources,
'confidence_scores': integration.confidence_scores,
'created_at': integration.created_at.isoformat() if integration.created_at else None
}
return None
except Exception as e:
logger.error(f"Error getting onboarding integration for strategy {strategy_id}: {str(e)}")
return None
async def get_strategy_completion_stats(self, user_id: int) -> Dict[str, Any]:
"""Get completion statistics for all strategies of a user."""
try:
strategies = await self.get_enhanced_strategies(user_id=user_id)
total_strategies = len(strategies)
completed_strategies = sum(1 for s in strategies if s.completion_percentage >= 80)
avg_completion = sum(s.completion_percentage for s in strategies) / total_strategies if total_strategies > 0 else 0
return {
'total_strategies': total_strategies,
'completed_strategies': completed_strategies,
'avg_completion_percentage': avg_completion,
'user_id': user_id
}
except Exception as e:
logger.error(f"Error getting strategy completion stats for user {user_id}: {str(e)}")
return {
'total_strategies': 0,
'completed_strategies': 0,
'avg_completion_percentage': 0,
'user_id': user_id
}
async def search_enhanced_strategies(self, user_id: int, search_term: str) -> List[EnhancedContentStrategy]:
"""Search enhanced strategies by name or industry."""
try:
return self.db.query(EnhancedContentStrategy).filter(
and_(
EnhancedContentStrategy.user_id == user_id,
or_(
EnhancedContentStrategy.name.ilike(f"%{search_term}%"),
EnhancedContentStrategy.industry.ilike(f"%{search_term}%")
)
)
).all()
except Exception as e:
logger.error(f"Error searching enhanced strategies: {str(e)}")
return []
async def get_strategy_export_data(self, strategy_id: int) -> Optional[Dict[str, Any]]:
"""Get comprehensive export data for a strategy."""
try:
strategy = await self.get_enhanced_strategy(strategy_id)
if not strategy:
return None
# Get strategy data
strategy_data = strategy.to_dict() if hasattr(strategy, 'to_dict') else {
'id': strategy.id,
'name': strategy.name,
'industry': strategy.industry,
'user_id': strategy.user_id,
'created_at': strategy.created_at.isoformat() if strategy.created_at else None,
'updated_at': strategy.updated_at.isoformat() if strategy.updated_at else None
}
# Get analytics data
analytics = await self.get_ai_analysis_history(strategy_id, limit=10)
# Get onboarding integration
onboarding = await self.get_onboarding_integration(strategy_id)
return {
'strategy': strategy_data,
'analytics': analytics,
'onboarding_integration': onboarding,
'exported_at': datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Error getting strategy export data for strategy {strategy_id}: {str(e)}")
return None

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,268 @@
"""
Gap Analysis Service for Content Planning API
Extracted business logic from the gap analysis route for better separation of concerns.
"""
from typing import Dict, Any, List, Optional
from datetime import datetime
from loguru import logger
from sqlalchemy.orm import Session
# Import database services
from services.content_planning_db import ContentPlanningDBService
from services.ai_analysis_db_service import AIAnalysisDBService
from services.onboarding_data_service import OnboardingDataService
# Import migrated content gap analysis services
from services.content_gap_analyzer.content_gap_analyzer import ContentGapAnalyzer
from services.content_gap_analyzer.competitor_analyzer import CompetitorAnalyzer
from services.content_gap_analyzer.keyword_researcher import KeywordResearcher
from services.content_gap_analyzer.ai_engine_service import AIEngineService
from services.content_gap_analyzer.website_analyzer import WebsiteAnalyzer
# Import utilities
from ..utils.error_handlers import ContentPlanningErrorHandler
from ..utils.response_builders import ResponseBuilder
from ..utils.constants import ERROR_MESSAGES, SUCCESS_MESSAGES
class GapAnalysisService:
"""Service class for content gap analysis operations."""
def __init__(self):
self.ai_analysis_db_service = AIAnalysisDBService()
self.onboarding_service = OnboardingDataService()
# Initialize migrated services
self.content_gap_analyzer = ContentGapAnalyzer()
self.competitor_analyzer = CompetitorAnalyzer()
self.keyword_researcher = KeywordResearcher()
self.ai_engine_service = AIEngineService()
self.website_analyzer = WebsiteAnalyzer()
async def create_gap_analysis(self, analysis_data: Dict[str, Any], db: Session) -> Dict[str, Any]:
"""Create a new content gap analysis."""
try:
logger.info(f"Creating content gap analysis for: {analysis_data.get('website_url', 'Unknown')}")
db_service = ContentPlanningDBService(db)
created_analysis = await db_service.create_content_gap_analysis(analysis_data)
if created_analysis:
logger.info(f"Content gap analysis created successfully: {created_analysis.id}")
return created_analysis.to_dict()
else:
raise Exception("Failed to create gap analysis")
except Exception as e:
logger.error(f"Error creating content gap analysis: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "create_gap_analysis")
async def get_gap_analyses(self, user_id: Optional[int] = None, strategy_id: Optional[int] = None, force_refresh: bool = False) -> Dict[str, Any]:
"""Get content gap analysis with real AI insights - Database first approach."""
try:
logger.info(f"🚀 Starting content gap analysis for user: {user_id}, strategy: {strategy_id}, force_refresh: {force_refresh}")
# Use user_id or default to 1
current_user_id = user_id or 1
# Skip database check if force_refresh is True
if not force_refresh:
# First, try to get existing gap analysis from database
logger.info(f"🔍 Checking database for existing gap analysis for user {current_user_id}")
existing_analysis = await self.ai_analysis_db_service.get_latest_ai_analysis(
user_id=current_user_id,
analysis_type="gap_analysis",
strategy_id=strategy_id,
max_age_hours=24 # Use cached results up to 24 hours old
)
if existing_analysis:
logger.info(f"✅ Found existing gap analysis in database: {existing_analysis.get('id', 'unknown')}")
# Return cached results
return {
"gap_analyses": [{"recommendations": existing_analysis.get('recommendations', [])}],
"total_gaps": len(existing_analysis.get('recommendations', [])),
"generated_at": existing_analysis.get('created_at', datetime.utcnow()).isoformat(),
"ai_service_status": existing_analysis.get('ai_service_status', 'operational'),
"personalized_data_used": True if existing_analysis.get('personalized_data_used') else False,
"data_source": "database_cache",
"cache_age_hours": (datetime.utcnow() - existing_analysis.get('created_at', datetime.utcnow())).total_seconds() / 3600
}
# No recent analysis found or force refresh requested, run new AI analysis
logger.info(f"🔄 Running new gap analysis for user {current_user_id} (force_refresh: {force_refresh})")
# Get personalized inputs from onboarding data
personalized_inputs = self.onboarding_service.get_personalized_ai_inputs(current_user_id)
logger.info(f"📊 Using personalized inputs: {len(personalized_inputs)} data points")
# Generate real AI-powered gap analysis
gap_analysis = await self.ai_engine_service.generate_content_recommendations(personalized_inputs)
logger.info(f"✅ AI gap analysis completed: {len(gap_analysis)} recommendations")
# Store results in database
try:
await self.ai_analysis_db_service.store_ai_analysis_result(
user_id=current_user_id,
analysis_type="gap_analysis",
insights=[],
recommendations=gap_analysis,
personalized_data=personalized_inputs,
strategy_id=strategy_id,
ai_service_status="operational"
)
logger.info(f"💾 Gap analysis results stored in database for user {current_user_id}")
except Exception as e:
logger.error(f"❌ Failed to store gap analysis in database: {str(e)}")
return {
"gap_analyses": [{"recommendations": gap_analysis}],
"total_gaps": len(gap_analysis),
"generated_at": datetime.utcnow().isoformat(),
"ai_service_status": "operational",
"personalized_data_used": True,
"data_source": "ai_analysis"
}
except Exception as e:
logger.error(f"❌ Error generating content gap analysis: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "get_gap_analyses")
async def get_gap_analysis_by_id(self, analysis_id: int, db: Session) -> Dict[str, Any]:
"""Get a specific content gap analysis by ID."""
try:
logger.info(f"Fetching content gap analysis: {analysis_id}")
db_service = ContentPlanningDBService(db)
analysis = await db_service.get_content_gap_analysis(analysis_id)
if analysis:
return analysis.to_dict()
else:
raise ContentPlanningErrorHandler.handle_not_found_error("Content gap analysis", analysis_id)
except Exception as e:
logger.error(f"Error getting content gap analysis: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "get_gap_analysis_by_id")
async def analyze_content_gaps(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
"""Analyze content gaps between your website and competitors."""
try:
logger.info(f"Starting content gap analysis for: {request_data.get('website_url', 'Unknown')}")
# Use migrated services for actual analysis
analysis_results = {}
# 1. Website Analysis
logger.info("Performing website analysis...")
website_analysis = await self.website_analyzer.analyze_website_content(request_data.get('website_url'))
analysis_results['website_analysis'] = website_analysis
# 2. Competitor Analysis
logger.info("Performing competitor analysis...")
competitor_analysis = await self.competitor_analyzer.analyze_competitors(request_data.get('competitor_urls', []))
analysis_results['competitor_analysis'] = competitor_analysis
# 3. Keyword Research
logger.info("Performing keyword research...")
keyword_analysis = await self.keyword_researcher.research_keywords(
industry=request_data.get('industry'),
target_keywords=request_data.get('target_keywords')
)
analysis_results['keyword_analysis'] = keyword_analysis
# 4. Content Gap Analysis
logger.info("Performing content gap analysis...")
gap_analysis = await self.content_gap_analyzer.identify_content_gaps(
website_url=request_data.get('website_url'),
competitor_urls=request_data.get('competitor_urls', []),
keyword_data=keyword_analysis
)
analysis_results['gap_analysis'] = gap_analysis
# 5. AI-Powered Recommendations
logger.info("Generating AI recommendations...")
recommendations = await self.ai_engine_service.generate_recommendations(
website_analysis=website_analysis,
competitor_analysis=competitor_analysis,
gap_analysis=gap_analysis,
keyword_analysis=keyword_analysis
)
analysis_results['recommendations'] = recommendations
# 6. Strategic Opportunities
logger.info("Identifying strategic opportunities...")
opportunities = await self.ai_engine_service.identify_strategic_opportunities(
gap_analysis=gap_analysis,
competitor_analysis=competitor_analysis,
keyword_analysis=keyword_analysis
)
analysis_results['opportunities'] = opportunities
# Prepare response
response_data = {
'website_analysis': analysis_results['website_analysis'],
'competitor_analysis': analysis_results['competitor_analysis'],
'gap_analysis': analysis_results['gap_analysis'],
'recommendations': analysis_results['recommendations'],
'opportunities': analysis_results['opportunities'],
'created_at': datetime.utcnow()
}
logger.info(f"Content gap analysis completed successfully")
return response_data
except Exception as e:
logger.error(f"Error analyzing content gaps: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "analyze_content_gaps")
async def get_user_gap_analyses(self, user_id: int, db: Session) -> List[Dict[str, Any]]:
"""Get all gap analyses for a specific user."""
try:
logger.info(f"Fetching gap analyses for user: {user_id}")
db_service = ContentPlanningDBService(db)
analyses = await db_service.get_user_content_gap_analyses(user_id)
return [analysis.to_dict() for analysis in analyses]
except Exception as e:
logger.error(f"Error getting user gap analyses: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "get_user_gap_analyses")
async def update_gap_analysis(self, analysis_id: int, update_data: Dict[str, Any], db: Session) -> Dict[str, Any]:
"""Update a content gap analysis."""
try:
logger.info(f"Updating content gap analysis: {analysis_id}")
db_service = ContentPlanningDBService(db)
updated_analysis = await db_service.update_content_gap_analysis(analysis_id, update_data)
if updated_analysis:
return updated_analysis.to_dict()
else:
raise ContentPlanningErrorHandler.handle_not_found_error("Content gap analysis", analysis_id)
except Exception as e:
logger.error(f"Error updating content gap analysis: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "update_gap_analysis")
async def delete_gap_analysis(self, analysis_id: int, db: Session) -> bool:
"""Delete a content gap analysis."""
try:
logger.info(f"Deleting content gap analysis: {analysis_id}")
db_service = ContentPlanningDBService(db)
deleted = await db_service.delete_content_gap_analysis(analysis_id)
if deleted:
return True
else:
raise ContentPlanningErrorHandler.handle_not_found_error("Content gap analysis", analysis_id)
except Exception as e:
logger.error(f"Error deleting content gap analysis: {str(e)}")
raise ContentPlanningErrorHandler.handle_general_error(e, "delete_gap_analysis")