ALwrity Version 0.5.0 (Fastapi + React )

This commit is contained in:
ajaysi
2025-08-06 12:48:02 +05:30
parent f28a919caa
commit 32f97fa6b3
476 changed files with 115544 additions and 28747 deletions

View File

@@ -0,0 +1,10 @@
"""
Onboarding Module
Onboarding data integration and processing services.
"""
from .data_integration import OnboardingDataIntegrationService
from .field_transformation import FieldTransformationService
from .data_quality import DataQualityService
__all__ = ['OnboardingDataIntegrationService', 'FieldTransformationService', 'DataQualityService']

View File

@@ -0,0 +1,381 @@
"""
Onboarding Data Integration Service
Onboarding data integration and processing.
"""
import logging
from typing import Dict, Any, Optional, List
from datetime import datetime, timedelta
from sqlalchemy.orm import Session
# Import database models
from models.enhanced_strategy_models import (
OnboardingDataIntegration
)
from models.onboarding import (
OnboardingSession,
WebsiteAnalysis,
ResearchPreferences,
APIKey
)
logger = logging.getLogger(__name__)
class OnboardingDataIntegrationService:
"""Service for onboarding data integration and processing."""
def __init__(self):
self.data_freshness_threshold = timedelta(hours=24)
self.max_analysis_age = timedelta(days=7)
async def process_onboarding_data(self, user_id: int, db: Session) -> Dict[str, Any]:
"""Process and integrate all onboarding data for a user."""
try:
logger.info(f"Processing onboarding data for user: {user_id}")
# Get all onboarding data sources
website_analysis = self._get_website_analysis(user_id, db)
research_preferences = self._get_research_preferences(user_id, db)
api_keys_data = self._get_api_keys_data(user_id, db)
onboarding_session = self._get_onboarding_session(user_id, db)
# Process and integrate data
integrated_data = {
'website_analysis': website_analysis,
'research_preferences': research_preferences,
'api_keys_data': api_keys_data,
'onboarding_session': onboarding_session,
'data_quality': self._assess_data_quality(website_analysis, research_preferences, api_keys_data),
'processing_timestamp': datetime.utcnow().isoformat()
}
# Store integrated data
await self._store_integrated_data(user_id, integrated_data, db)
logger.info(f"Onboarding data processed successfully for user: {user_id}")
return integrated_data
except Exception as e:
logger.error(f"Error processing onboarding data for user {user_id}: {str(e)}")
return self._get_fallback_data()
def _get_website_analysis(self, user_id: int, db: Session) -> Dict[str, Any]:
"""Get website analysis data for the user."""
try:
# Get the latest onboarding session for the user
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not session:
logger.warning(f"No onboarding session found for user {user_id}")
return {}
# Get the latest website analysis for this session
website_analysis = db.query(WebsiteAnalysis).filter(
WebsiteAnalysis.session_id == session.id
).order_by(WebsiteAnalysis.updated_at.desc()).first()
if not website_analysis:
logger.warning(f"No website analysis found for user {user_id}")
return {}
# Convert to dictionary and add metadata
analysis_data = website_analysis.to_dict()
analysis_data['data_freshness'] = self._calculate_freshness(website_analysis.updated_at)
analysis_data['confidence_level'] = 0.9 if website_analysis.status == 'completed' else 0.5
logger.info(f"Retrieved website analysis for user {user_id}: {website_analysis.website_url}")
return analysis_data
except Exception as e:
logger.error(f"Error getting website analysis for user {user_id}: {str(e)}")
return {}
def _get_research_preferences(self, user_id: int, db: Session) -> Dict[str, Any]:
"""Get research preferences data for the user."""
try:
# Get the latest onboarding session for the user
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not session:
logger.warning(f"No onboarding session found for user {user_id}")
return {}
# Get research preferences for this session
research_prefs = db.query(ResearchPreferences).filter(
ResearchPreferences.session_id == session.id
).first()
if not research_prefs:
logger.warning(f"No research preferences found for user {user_id}")
return {}
# Convert to dictionary and add metadata
prefs_data = research_prefs.to_dict()
prefs_data['data_freshness'] = self._calculate_freshness(research_prefs.updated_at)
prefs_data['confidence_level'] = 0.9
logger.info(f"Retrieved research preferences for user {user_id}")
return prefs_data
except Exception as e:
logger.error(f"Error getting research preferences for user {user_id}: {str(e)}")
return {}
def _get_api_keys_data(self, user_id: int, db: Session) -> Dict[str, Any]:
"""Get API keys data for the user."""
try:
# Get the latest onboarding session for the user
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not session:
logger.warning(f"No onboarding session found for user {user_id}")
return {}
# Get all API keys for this session
api_keys = db.query(APIKey).filter(
APIKey.session_id == session.id
).all()
if not api_keys:
logger.warning(f"No API keys found for user {user_id}")
return {}
# Convert to dictionary format
api_data = {
'api_keys': [key.to_dict() for key in api_keys],
'total_keys': len(api_keys),
'providers': [key.provider for key in api_keys],
'data_freshness': self._calculate_freshness(session.updated_at),
'confidence_level': 0.8
}
logger.info(f"Retrieved {len(api_keys)} API keys for user {user_id}")
return api_data
except Exception as e:
logger.error(f"Error getting API keys data for user {user_id}: {str(e)}")
return {}
def _get_onboarding_session(self, user_id: int, db: Session) -> Dict[str, Any]:
"""Get onboarding session data for the user."""
try:
# Get the latest onboarding session for the user
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not session:
logger.warning(f"No onboarding session found for user {user_id}")
return {}
# Convert to dictionary
session_data = {
'id': session.id,
'user_id': session.user_id,
'current_step': session.current_step,
'progress': session.progress,
'started_at': session.started_at.isoformat() if session.started_at else None,
'updated_at': session.updated_at.isoformat() if session.updated_at else None,
'data_freshness': self._calculate_freshness(session.updated_at),
'confidence_level': 0.9
}
logger.info(f"Retrieved onboarding session for user {user_id}: step {session.current_step}, progress {session.progress}%")
return session_data
except Exception as e:
logger.error(f"Error getting onboarding session for user {user_id}: {str(e)}")
return {}
def _assess_data_quality(self, website_analysis: Dict, research_preferences: Dict, api_keys_data: Dict) -> Dict[str, Any]:
"""Assess the quality and completeness of onboarding data."""
try:
quality_metrics = {
'overall_score': 0.0,
'completeness': 0.0,
'freshness': 0.0,
'relevance': 0.0,
'confidence': 0.0
}
# Calculate completeness
total_fields = 0
filled_fields = 0
# Website analysis completeness
website_fields = ['domain', 'industry', 'business_type', 'target_audience', 'content_goals']
for field in website_fields:
total_fields += 1
if website_analysis.get(field):
filled_fields += 1
# Research preferences completeness
research_fields = ['research_topics', 'content_types', 'target_audience', 'industry_focus']
for field in research_fields:
total_fields += 1
if research_preferences.get(field):
filled_fields += 1
# API keys completeness
total_fields += 1
if api_keys_data:
filled_fields += 1
quality_metrics['completeness'] = filled_fields / total_fields if total_fields > 0 else 0.0
# Calculate freshness
freshness_scores = []
for data_source in [website_analysis, research_preferences]:
if data_source.get('data_freshness'):
freshness_scores.append(data_source['data_freshness'])
quality_metrics['freshness'] = sum(freshness_scores) / len(freshness_scores) if freshness_scores else 0.0
# Calculate relevance (based on data presence and quality)
relevance_score = 0.0
if website_analysis.get('domain'):
relevance_score += 0.4
if research_preferences.get('research_topics'):
relevance_score += 0.3
if api_keys_data:
relevance_score += 0.3
quality_metrics['relevance'] = relevance_score
# Calculate confidence
quality_metrics['confidence'] = (quality_metrics['completeness'] + quality_metrics['freshness'] + quality_metrics['relevance']) / 3
# Calculate overall score
quality_metrics['overall_score'] = quality_metrics['confidence']
return quality_metrics
except Exception as e:
logger.error(f"Error assessing data quality: {str(e)}")
return {
'overall_score': 0.0,
'completeness': 0.0,
'freshness': 0.0,
'relevance': 0.0,
'confidence': 0.0
}
def _calculate_freshness(self, created_at: datetime) -> float:
"""Calculate data freshness score (0.0 to 1.0)."""
try:
age = datetime.utcnow() - created_at
if age <= self.data_freshness_threshold:
return 1.0
elif age <= self.max_analysis_age:
# Linear decay from 1.0 to 0.5
decay_factor = 1.0 - (age - self.data_freshness_threshold) / (self.max_analysis_age - self.data_freshness_threshold) * 0.5
return max(0.5, decay_factor)
else:
return 0.5 # Minimum freshness for old data
except Exception as e:
logger.error(f"Error calculating data freshness: {str(e)}")
return 0.5
def _check_api_data_availability(self, api_key_data: Dict) -> bool:
"""Check if API key has available data."""
try:
# Check if API key has been used recently and has data
if api_key_data.get('last_used') and api_key_data.get('usage_count', 0) > 0:
return api_key_data.get('data_available', False)
return False
except Exception as e:
logger.error(f"Error checking API data availability: {str(e)}")
return False
async def _store_integrated_data(self, user_id: int, integrated_data: Dict[str, Any], db: Session) -> None:
"""Store integrated onboarding data."""
try:
# Create or update integrated data record
existing_record = db.query(OnboardingDataIntegration).filter(
OnboardingDataIntegration.user_id == user_id
).first()
if existing_record:
existing_record.website_analysis_data = integrated_data.get('website_analysis', {})
existing_record.research_preferences_data = integrated_data.get('research_preferences', {})
existing_record.api_keys_data = integrated_data.get('api_keys_data', {})
existing_record.updated_at = datetime.utcnow()
else:
new_record = OnboardingDataIntegration(
user_id=user_id,
website_analysis_data=integrated_data.get('website_analysis', {}),
research_preferences_data=integrated_data.get('research_preferences', {}),
api_keys_data=integrated_data.get('api_keys_data', {}),
created_at=datetime.utcnow(),
updated_at=datetime.utcnow()
)
db.add(new_record)
db.commit()
logger.info(f"Integrated onboarding data stored for user: {user_id}")
except Exception as e:
logger.error(f"Error storing integrated data for user {user_id}: {str(e)}")
db.rollback()
def _get_fallback_data(self) -> Dict[str, Any]:
"""Get fallback data when processing fails."""
return {
'website_analysis': {},
'research_preferences': {},
'api_keys_data': {},
'onboarding_session': {},
'data_quality': {
'overall_score': 0.0,
'completeness': 0.0,
'freshness': 0.0,
'relevance': 0.0,
'confidence': 0.0
},
'processing_timestamp': datetime.utcnow().isoformat()
}
async def get_integrated_data(self, user_id: int, db: Session) -> Optional[Dict[str, Any]]:
"""Get previously integrated onboarding data for a user."""
try:
record = db.query(OnboardingDataIntegration).filter(
OnboardingDataIntegration.user_id == user_id
).first()
if record:
# Reconstruct integrated data from stored fields
integrated_data = {
'website_analysis': record.website_analysis_data or {},
'research_preferences': record.research_preferences_data or {},
'api_keys_data': record.api_keys_data or {},
'onboarding_session': {},
'data_quality': self._assess_data_quality(
record.website_analysis_data or {},
record.research_preferences_data or {},
record.api_keys_data or {}
),
'processing_timestamp': record.updated_at.isoformat()
}
# Check if data is still fresh
updated_at = record.updated_at
if datetime.utcnow() - updated_at <= self.data_freshness_threshold:
return integrated_data
else:
logger.info(f"Integrated data is stale for user {user_id}, reprocessing...")
return await self.process_onboarding_data(user_id, db)
return None
except Exception as e:
logger.error(f"Error getting integrated data for user {user_id}: {str(e)}")
return None

View File

@@ -0,0 +1,547 @@
"""
Data Quality Service
Onboarding data quality assessment.
"""
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime, timedelta
logger = logging.getLogger(__name__)
class DataQualityService:
"""Service for assessing data quality and validation."""
def __init__(self):
self.quality_thresholds = {
'excellent': 0.9,
'good': 0.7,
'fair': 0.5,
'poor': 0.3
}
self.data_freshness_threshold = timedelta(hours=24)
self.max_data_age = timedelta(days=30)
def assess_onboarding_data_quality(self, integrated_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess the overall quality of onboarding data."""
try:
logger.info("Assessing onboarding data quality")
quality_assessment = {
'overall_score': 0.0,
'completeness': 0.0,
'freshness': 0.0,
'accuracy': 0.0,
'relevance': 0.0,
'consistency': 0.0,
'confidence': 0.0,
'quality_level': 'poor',
'recommendations': [],
'issues': [],
'assessment_timestamp': datetime.utcnow().isoformat()
}
# Assess each data source
website_quality = self._assess_website_analysis_quality(integrated_data.get('website_analysis', {}))
research_quality = self._assess_research_preferences_quality(integrated_data.get('research_preferences', {}))
api_quality = self._assess_api_keys_quality(integrated_data.get('api_keys_data', {}))
session_quality = self._assess_onboarding_session_quality(integrated_data.get('onboarding_session', {}))
# Calculate overall quality metrics
quality_assessment['completeness'] = self._calculate_completeness_score(
website_quality, research_quality, api_quality, session_quality
)
quality_assessment['freshness'] = self._calculate_freshness_score(
website_quality, research_quality, api_quality, session_quality
)
quality_assessment['accuracy'] = self._calculate_accuracy_score(
website_quality, research_quality, api_quality, session_quality
)
quality_assessment['relevance'] = self._calculate_relevance_score(
website_quality, research_quality, api_quality, session_quality
)
quality_assessment['consistency'] = self._calculate_consistency_score(
website_quality, research_quality, api_quality, session_quality
)
# Calculate confidence and overall score
quality_assessment['confidence'] = (
quality_assessment['completeness'] +
quality_assessment['freshness'] +
quality_assessment['accuracy'] +
quality_assessment['relevance'] +
quality_assessment['consistency']
) / 5
quality_assessment['overall_score'] = quality_assessment['confidence']
# Determine quality level
quality_assessment['quality_level'] = self._determine_quality_level(quality_assessment['overall_score'])
# Generate recommendations and identify issues
quality_assessment['recommendations'] = self._generate_quality_recommendations(quality_assessment)
quality_assessment['issues'] = self._identify_quality_issues(quality_assessment)
logger.info(f"Data quality assessment completed. Overall score: {quality_assessment['overall_score']:.2f}")
return quality_assessment
except Exception as e:
logger.error(f"Error assessing data quality: {str(e)}")
return self._get_fallback_quality_assessment()
def _assess_website_analysis_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess quality of website analysis data."""
try:
quality_metrics = {
'completeness': 0.0,
'freshness': 0.0,
'accuracy': 0.0,
'relevance': 0.0,
'consistency': 0.0
}
if not website_data:
return quality_metrics
# Completeness assessment
required_fields = ['domain', 'industry', 'business_type', 'target_audience', 'content_goals']
present_fields = sum(1 for field in required_fields if website_data.get(field))
quality_metrics['completeness'] = present_fields / len(required_fields)
# Freshness assessment
if website_data.get('created_at'):
try:
created_at = datetime.fromisoformat(website_data['created_at'].replace('Z', '+00:00'))
age = datetime.utcnow() - created_at
quality_metrics['freshness'] = self._calculate_freshness_score_from_age(age)
except Exception:
quality_metrics['freshness'] = 0.5
# Accuracy assessment (based on data presence and format)
accuracy_score = 0.0
if website_data.get('domain') and isinstance(website_data['domain'], str):
accuracy_score += 0.2
if website_data.get('industry') and isinstance(website_data['industry'], str):
accuracy_score += 0.2
if website_data.get('business_type') and isinstance(website_data['business_type'], str):
accuracy_score += 0.2
if website_data.get('target_audience') and isinstance(website_data['target_audience'], str):
accuracy_score += 0.2
if website_data.get('content_goals') and isinstance(website_data['content_goals'], (str, list)):
accuracy_score += 0.2
quality_metrics['accuracy'] = accuracy_score
# Relevance assessment
relevance_score = 0.0
if website_data.get('domain'):
relevance_score += 0.3
if website_data.get('industry'):
relevance_score += 0.3
if website_data.get('content_goals'):
relevance_score += 0.4
quality_metrics['relevance'] = relevance_score
# Consistency assessment
consistency_score = 0.0
if website_data.get('domain') and website_data.get('industry'):
consistency_score += 0.5
if website_data.get('target_audience') and website_data.get('content_goals'):
consistency_score += 0.5
quality_metrics['consistency'] = consistency_score
return quality_metrics
except Exception as e:
logger.error(f"Error assessing website analysis quality: {str(e)}")
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
def _assess_research_preferences_quality(self, research_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess quality of research preferences data."""
try:
quality_metrics = {
'completeness': 0.0,
'freshness': 0.0,
'accuracy': 0.0,
'relevance': 0.0,
'consistency': 0.0
}
if not research_data:
return quality_metrics
# Completeness assessment
required_fields = ['research_topics', 'content_types', 'target_audience', 'industry_focus']
present_fields = sum(1 for field in required_fields if research_data.get(field))
quality_metrics['completeness'] = present_fields / len(required_fields)
# Freshness assessment
if research_data.get('created_at'):
try:
created_at = datetime.fromisoformat(research_data['created_at'].replace('Z', '+00:00'))
age = datetime.utcnow() - created_at
quality_metrics['freshness'] = self._calculate_freshness_score_from_age(age)
except Exception:
quality_metrics['freshness'] = 0.5
# Accuracy assessment
accuracy_score = 0.0
if research_data.get('research_topics') and isinstance(research_data['research_topics'], (str, list)):
accuracy_score += 0.25
if research_data.get('content_types') and isinstance(research_data['content_types'], (str, list)):
accuracy_score += 0.25
if research_data.get('target_audience') and isinstance(research_data['target_audience'], str):
accuracy_score += 0.25
if research_data.get('industry_focus') and isinstance(research_data['industry_focus'], str):
accuracy_score += 0.25
quality_metrics['accuracy'] = accuracy_score
# Relevance assessment
relevance_score = 0.0
if research_data.get('research_topics'):
relevance_score += 0.4
if research_data.get('content_types'):
relevance_score += 0.3
if research_data.get('target_audience'):
relevance_score += 0.3
quality_metrics['relevance'] = relevance_score
# Consistency assessment
consistency_score = 0.0
if research_data.get('research_topics') and research_data.get('content_types'):
consistency_score += 0.5
if research_data.get('target_audience') and research_data.get('industry_focus'):
consistency_score += 0.5
quality_metrics['consistency'] = consistency_score
return quality_metrics
except Exception as e:
logger.error(f"Error assessing research preferences quality: {str(e)}")
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
def _assess_api_keys_quality(self, api_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess quality of API keys data."""
try:
quality_metrics = {
'completeness': 0.0,
'freshness': 0.0,
'accuracy': 0.0,
'relevance': 0.0,
'consistency': 0.0
}
if not api_data:
return quality_metrics
# Completeness assessment
total_apis = len(api_data)
active_apis = sum(1 for api_info in api_data.values() if api_info.get('is_active'))
quality_metrics['completeness'] = active_apis / max(total_apis, 1)
# Freshness assessment
freshness_scores = []
for api_info in api_data.values():
if api_info.get('last_used'):
try:
last_used = datetime.fromisoformat(api_info['last_used'].replace('Z', '+00:00'))
age = datetime.utcnow() - last_used
freshness_scores.append(self._calculate_freshness_score_from_age(age))
except Exception:
freshness_scores.append(0.5)
quality_metrics['freshness'] = sum(freshness_scores) / len(freshness_scores) if freshness_scores else 0.5
# Accuracy assessment
accuracy_score = 0.0
for api_info in api_data.values():
if api_info.get('service_name') and api_info.get('is_active'):
accuracy_score += 0.5
if api_info.get('data_available'):
accuracy_score += 0.5
quality_metrics['accuracy'] = accuracy_score / max(len(api_data), 1)
# Relevance assessment
relevant_apis = ['google_analytics', 'google_search_console', 'semrush', 'ahrefs', 'moz']
relevant_count = sum(1 for api_name in api_data.keys() if api_name.lower() in relevant_apis)
quality_metrics['relevance'] = relevant_count / max(len(api_data), 1)
# Consistency assessment
consistency_score = 0.0
if len(api_data) > 0:
consistency_score = 0.5 # Basic consistency if APIs exist
if any(api_info.get('data_available') for api_info in api_data.values()):
consistency_score += 0.5
quality_metrics['consistency'] = consistency_score
return quality_metrics
except Exception as e:
logger.error(f"Error assessing API keys quality: {str(e)}")
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
def _assess_onboarding_session_quality(self, session_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess quality of onboarding session data."""
try:
quality_metrics = {
'completeness': 0.0,
'freshness': 0.0,
'accuracy': 0.0,
'relevance': 0.0,
'consistency': 0.0
}
if not session_data:
return quality_metrics
# Completeness assessment
required_fields = ['session_id', 'completion_percentage', 'completed_steps', 'current_step']
present_fields = sum(1 for field in required_fields if session_data.get(field))
quality_metrics['completeness'] = present_fields / len(required_fields)
# Freshness assessment
if session_data.get('updated_at'):
try:
updated_at = datetime.fromisoformat(session_data['updated_at'].replace('Z', '+00:00'))
age = datetime.utcnow() - updated_at
quality_metrics['freshness'] = self._calculate_freshness_score_from_age(age)
except Exception:
quality_metrics['freshness'] = 0.5
# Accuracy assessment
accuracy_score = 0.0
if session_data.get('session_id') and isinstance(session_data['session_id'], str):
accuracy_score += 0.25
if session_data.get('completion_percentage') and isinstance(session_data['completion_percentage'], (int, float)):
accuracy_score += 0.25
if session_data.get('completed_steps') and isinstance(session_data['completed_steps'], (list, int)):
accuracy_score += 0.25
if session_data.get('current_step') and isinstance(session_data['current_step'], (str, int)):
accuracy_score += 0.25
quality_metrics['accuracy'] = accuracy_score
# Relevance assessment
relevance_score = 0.0
if session_data.get('completion_percentage', 0) > 50:
relevance_score += 0.5
if session_data.get('session_data'):
relevance_score += 0.5
quality_metrics['relevance'] = relevance_score
# Consistency assessment
consistency_score = 0.0
if session_data.get('completion_percentage') and session_data.get('completed_steps'):
consistency_score += 0.5
if session_data.get('current_step') and session_data.get('session_id'):
consistency_score += 0.5
quality_metrics['consistency'] = consistency_score
return quality_metrics
except Exception as e:
logger.error(f"Error assessing onboarding session quality: {str(e)}")
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
def _calculate_completeness_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
"""Calculate overall completeness score."""
try:
scores = [
website_quality['completeness'],
research_quality['completeness'],
api_quality['completeness'],
session_quality['completeness']
]
return sum(scores) / len(scores)
except Exception as e:
logger.error(f"Error calculating completeness score: {str(e)}")
return 0.0
def _calculate_freshness_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
"""Calculate overall freshness score."""
try:
scores = [
website_quality['freshness'],
research_quality['freshness'],
api_quality['freshness'],
session_quality['freshness']
]
return sum(scores) / len(scores)
except Exception as e:
logger.error(f"Error calculating freshness score: {str(e)}")
return 0.0
def _calculate_accuracy_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
"""Calculate overall accuracy score."""
try:
scores = [
website_quality['accuracy'],
research_quality['accuracy'],
api_quality['accuracy'],
session_quality['accuracy']
]
return sum(scores) / len(scores)
except Exception as e:
logger.error(f"Error calculating accuracy score: {str(e)}")
return 0.0
def _calculate_relevance_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
"""Calculate overall relevance score."""
try:
scores = [
website_quality['relevance'],
research_quality['relevance'],
api_quality['relevance'],
session_quality['relevance']
]
return sum(scores) / len(scores)
except Exception as e:
logger.error(f"Error calculating relevance score: {str(e)}")
return 0.0
def _calculate_consistency_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
"""Calculate overall consistency score."""
try:
scores = [
website_quality['consistency'],
research_quality['consistency'],
api_quality['consistency'],
session_quality['consistency']
]
return sum(scores) / len(scores)
except Exception as e:
logger.error(f"Error calculating consistency score: {str(e)}")
return 0.0
def _calculate_freshness_score_from_age(self, age: timedelta) -> float:
"""Calculate freshness score based on data age."""
try:
if age <= self.data_freshness_threshold:
return 1.0
elif age <= self.max_data_age:
# Linear decay from 1.0 to 0.5
decay_factor = 1.0 - (age - self.data_freshness_threshold) / (self.max_data_age - self.data_freshness_threshold) * 0.5
return max(0.5, decay_factor)
else:
return 0.5 # Minimum freshness for old data
except Exception as e:
logger.error(f"Error calculating freshness score from age: {str(e)}")
return 0.5
def _determine_quality_level(self, overall_score: float) -> str:
"""Determine quality level based on overall score."""
try:
if overall_score >= self.quality_thresholds['excellent']:
return 'excellent'
elif overall_score >= self.quality_thresholds['good']:
return 'good'
elif overall_score >= self.quality_thresholds['fair']:
return 'fair'
else:
return 'poor'
except Exception as e:
logger.error(f"Error determining quality level: {str(e)}")
return 'poor'
def _generate_quality_recommendations(self, quality_assessment: Dict[str, Any]) -> List[str]:
"""Generate recommendations based on quality assessment."""
try:
recommendations = []
if quality_assessment['completeness'] < 0.7:
recommendations.append("Complete missing onboarding data to improve strategy accuracy")
if quality_assessment['freshness'] < 0.7:
recommendations.append("Update stale data to ensure current market insights")
if quality_assessment['accuracy'] < 0.7:
recommendations.append("Verify data accuracy for better strategy recommendations")
if quality_assessment['relevance'] < 0.7:
recommendations.append("Provide more relevant data for targeted strategy development")
if quality_assessment['consistency'] < 0.7:
recommendations.append("Ensure data consistency across different sources")
if quality_assessment['overall_score'] < 0.5:
recommendations.append("Consider re-running onboarding process for better data quality")
return recommendations
except Exception as e:
logger.error(f"Error generating quality recommendations: {str(e)}")
return ["Unable to generate recommendations due to assessment error"]
def _identify_quality_issues(self, quality_assessment: Dict[str, Any]) -> List[str]:
"""Identify specific quality issues."""
try:
issues = []
if quality_assessment['completeness'] < 0.5:
issues.append("Incomplete data: Missing critical onboarding information")
if quality_assessment['freshness'] < 0.5:
issues.append("Stale data: Information may be outdated")
if quality_assessment['accuracy'] < 0.5:
issues.append("Data accuracy concerns: Verify information validity")
if quality_assessment['relevance'] < 0.5:
issues.append("Low relevance: Data may not align with current needs")
if quality_assessment['consistency'] < 0.5:
issues.append("Inconsistent data: Conflicting information detected")
return issues
except Exception as e:
logger.error(f"Error identifying quality issues: {str(e)}")
return ["Unable to identify issues due to assessment error"]
def _get_fallback_quality_assessment(self) -> Dict[str, Any]:
"""Get fallback quality assessment when assessment fails."""
return {
'overall_score': 0.0,
'completeness': 0.0,
'freshness': 0.0,
'accuracy': 0.0,
'relevance': 0.0,
'consistency': 0.0,
'confidence': 0.0,
'quality_level': 'poor',
'recommendations': ['Unable to assess data quality'],
'issues': ['Quality assessment failed'],
'assessment_timestamp': datetime.utcnow().isoformat()
}
def validate_field_data(self, field_data: Dict[str, Any]) -> Dict[str, Any]:
"""Validate individual field data."""
try:
validation_result = {
'is_valid': True,
'errors': [],
'warnings': [],
'confidence': 1.0
}
for field_name, field_value in field_data.items():
if field_value is None or field_value == '':
validation_result['errors'].append(f"Field '{field_name}' is empty")
validation_result['is_valid'] = False
elif isinstance(field_value, str) and len(field_value.strip()) < 3:
validation_result['warnings'].append(f"Field '{field_name}' may be too short")
validation_result['confidence'] *= 0.9
return validation_result
except Exception as e:
logger.error(f"Error validating field data: {str(e)}")
return {
'is_valid': False,
'errors': ['Validation failed'],
'warnings': [],
'confidence': 0.0
}

View File

@@ -0,0 +1,790 @@
"""
Field Transformation Service
Onboarding data to field mapping.
"""
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime
logger = logging.getLogger(__name__)
class FieldTransformationService:
"""Service for transforming onboarding data to strategic input fields."""
def __init__(self):
# Define field mapping configurations
self.field_mappings = {
# Business Context mappings
'business_objectives': {
'sources': ['website_analysis.content_goals', 'research_preferences.research_topics'],
'transformation': 'extract_business_objectives'
},
'target_metrics': {
'sources': ['website_analysis.performance_metrics', 'research_preferences.performance_tracking'],
'transformation': 'extract_target_metrics'
},
'content_budget': {
'sources': ['onboarding_session.session_data.budget'],
'transformation': 'extract_budget'
},
'team_size': {
'sources': ['onboarding_session.session_data.team_size'],
'transformation': 'extract_team_size'
},
'implementation_timeline': {
'sources': ['onboarding_session.session_data.timeline'],
'transformation': 'extract_timeline'
},
'market_share': {
'sources': ['website_analysis.performance_metrics'],
'transformation': 'extract_market_share'
},
'competitive_position': {
'sources': ['website_analysis.competitors', 'research_preferences.competitor_analysis'],
'transformation': 'extract_competitive_position'
},
'performance_metrics': {
'sources': ['website_analysis.performance_metrics'],
'transformation': 'extract_performance_metrics'
},
# Audience Intelligence mappings
'content_preferences': {
'sources': ['research_preferences.content_types'],
'transformation': 'extract_content_preferences'
},
'consumption_patterns': {
'sources': ['website_analysis.target_audience', 'research_preferences.target_audience'],
'transformation': 'extract_consumption_patterns'
},
'audience_pain_points': {
'sources': ['website_analysis.content_gaps', 'research_preferences.research_topics'],
'transformation': 'extract_pain_points'
},
'buying_journey': {
'sources': ['website_analysis.target_audience', 'research_preferences.target_audience'],
'transformation': 'extract_buying_journey'
},
'seasonal_trends': {
'sources': ['research_preferences.trend_analysis'],
'transformation': 'extract_seasonal_trends'
},
'engagement_metrics': {
'sources': ['website_analysis.performance_metrics'],
'transformation': 'extract_engagement_metrics'
},
# Competitive Intelligence mappings
'top_competitors': {
'sources': ['website_analysis.competitors'],
'transformation': 'extract_competitors'
},
'competitor_content_strategies': {
'sources': ['website_analysis.competitors', 'research_preferences.competitor_analysis'],
'transformation': 'extract_competitor_strategies'
},
'market_gaps': {
'sources': ['website_analysis.content_gaps', 'research_preferences.research_topics'],
'transformation': 'extract_market_gaps'
},
'industry_trends': {
'sources': ['website_analysis.industry', 'research_preferences.industry_focus'],
'transformation': 'extract_industry_trends'
},
'emerging_trends': {
'sources': ['research_preferences.trend_analysis'],
'transformation': 'extract_emerging_trends'
},
# Content Strategy mappings
'preferred_formats': {
'sources': ['research_preferences.content_types'],
'transformation': 'extract_preferred_formats'
},
'content_mix': {
'sources': ['research_preferences.content_types', 'website_analysis.content_goals'],
'transformation': 'extract_content_mix'
},
'content_frequency': {
'sources': ['research_preferences.content_calendar'],
'transformation': 'extract_content_frequency'
},
'optimal_timing': {
'sources': ['research_preferences.content_calendar'],
'transformation': 'extract_optimal_timing'
},
'quality_metrics': {
'sources': ['website_analysis.performance_metrics'],
'transformation': 'extract_quality_metrics'
},
'editorial_guidelines': {
'sources': ['website_analysis.business_type', 'research_preferences.content_types'],
'transformation': 'extract_editorial_guidelines'
},
'brand_voice': {
'sources': ['website_analysis.business_type', 'onboarding_session.session_data.brand_voice'],
'transformation': 'extract_brand_voice'
},
# Performance Analytics mappings
'traffic_sources': {
'sources': ['website_analysis.performance_metrics'],
'transformation': 'extract_traffic_sources'
},
'conversion_rates': {
'sources': ['website_analysis.performance_metrics'],
'transformation': 'extract_conversion_rates'
},
'content_roi_targets': {
'sources': ['onboarding_session.session_data.budget', 'website_analysis.performance_metrics'],
'transformation': 'extract_roi_targets'
},
'ab_testing_capabilities': {
'sources': ['onboarding_session.session_data.team_size'],
'transformation': 'extract_ab_testing_capabilities'
}
}
def transform_onboarding_data_to_fields(self, integrated_data: Dict[str, Any]) -> Dict[str, Any]:
"""Transform integrated onboarding data to strategic input fields."""
try:
logger.info("Transforming onboarding data to strategic fields")
transformed_fields = {}
data_sources = {}
for field_id, mapping_config in self.field_mappings.items():
try:
# Extract data from sources
source_data = self._extract_source_data(integrated_data, mapping_config['sources'])
if source_data:
# Apply transformation
transformation_method = getattr(self, mapping_config['transformation'])
transformed_value = transformation_method(source_data, integrated_data)
if transformed_value:
transformed_fields[field_id] = transformed_value
data_sources[field_id] = self._get_data_source_info(mapping_config['sources'], integrated_data)
except Exception as e:
logger.warning(f"Error transforming field {field_id}: {str(e)}")
continue
result = {
'fields': transformed_fields,
'sources': data_sources,
'transformation_metadata': {
'total_fields_processed': len(self.field_mappings),
'successful_transformations': len(transformed_fields),
'transformation_timestamp': datetime.utcnow().isoformat()
}
}
logger.info(f"Successfully transformed {len(transformed_fields)} fields from onboarding data")
return result
except Exception as e:
logger.error(f"Error transforming onboarding data to fields: {str(e)}")
return {'fields': {}, 'sources': {}, 'transformation_metadata': {'error': str(e)}}
def _extract_source_data(self, integrated_data: Dict[str, Any], sources: List[str]) -> Dict[str, Any]:
"""Extract data from specified sources."""
source_data = {}
for source_path in sources:
try:
# Navigate nested dictionary structure
keys = source_path.split('.')
value = integrated_data
for key in keys:
if isinstance(value, dict) and key in value:
value = value[key]
else:
value = None
break
if value is not None:
source_data[source_path] = value
except Exception as e:
logger.debug(f"Error extracting data from {source_path}: {str(e)}")
continue
return source_data
def _get_data_source_info(self, sources: List[str], integrated_data: Dict[str, Any]) -> Dict[str, Any]:
"""Get information about data sources for a field."""
source_info = {
'sources': sources,
'data_quality': self._assess_source_quality(sources, integrated_data),
'last_updated': datetime.utcnow().isoformat()
}
return source_info
def _assess_source_quality(self, sources: List[str], integrated_data: Dict[str, Any]) -> float:
"""Assess the quality of data sources."""
try:
quality_scores = []
for source in sources:
# Check if source exists and has data
keys = source.split('.')
value = integrated_data
for key in keys:
if isinstance(value, dict) and key in value:
value = value[key]
else:
value = None
break
if value:
# Basic quality assessment
if isinstance(value, (list, dict)) and len(value) > 0:
quality_scores.append(1.0)
elif isinstance(value, str) and len(value.strip()) > 0:
quality_scores.append(0.8)
else:
quality_scores.append(0.5)
else:
quality_scores.append(0.0)
return sum(quality_scores) / len(quality_scores) if quality_scores else 0.0
except Exception as e:
logger.error(f"Error assessing source quality: {str(e)}")
return 0.0
# Transformation methods for each field type
def extract_business_objectives(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract business objectives from content goals and research topics."""
try:
objectives = []
if 'website_analysis.content_goals' in source_data:
goals = source_data['website_analysis.content_goals']
if isinstance(goals, list):
objectives.extend(goals)
elif isinstance(goals, str):
objectives.append(goals)
if 'research_preferences.research_topics' in source_data:
topics = source_data['research_preferences.research_topics']
if isinstance(topics, list):
objectives.extend(topics)
elif isinstance(topics, str):
objectives.append(topics)
return ', '.join(objectives) if objectives else None
except Exception as e:
logger.error(f"Error extracting business objectives: {str(e)}")
return None
def extract_target_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract target metrics from performance data."""
try:
metrics = []
if 'website_analysis.performance_metrics' in source_data:
perf_metrics = source_data['website_analysis.performance_metrics']
if isinstance(perf_metrics, dict):
metrics.extend([f"{k}: {v}" for k, v in perf_metrics.items()])
elif isinstance(perf_metrics, str):
metrics.append(perf_metrics)
if 'research_preferences.performance_tracking' in source_data:
tracking = source_data['research_preferences.performance_tracking']
if isinstance(tracking, list):
metrics.extend(tracking)
elif isinstance(tracking, str):
metrics.append(tracking)
return ', '.join(metrics) if metrics else None
except Exception as e:
logger.error(f"Error extracting target metrics: {str(e)}")
return None
def extract_budget(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract content budget from session data."""
try:
if 'onboarding_session.session_data.budget' in source_data:
budget = source_data['onboarding_session.session_data.budget']
if budget:
return str(budget)
return None
except Exception as e:
logger.error(f"Error extracting budget: {str(e)}")
return None
def extract_team_size(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract team size from session data."""
try:
if 'onboarding_session.session_data.team_size' in source_data:
team_size = source_data['onboarding_session.session_data.team_size']
if team_size:
return str(team_size)
return None
except Exception as e:
logger.error(f"Error extracting team size: {str(e)}")
return None
def extract_timeline(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract implementation timeline from session data."""
try:
if 'onboarding_session.session_data.timeline' in source_data:
timeline = source_data['onboarding_session.session_data.timeline']
if timeline:
return str(timeline)
return None
except Exception as e:
logger.error(f"Error extracting timeline: {str(e)}")
return None
def extract_market_share(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract market share from performance metrics."""
try:
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict) and 'market_share' in metrics:
return str(metrics['market_share'])
return None
except Exception as e:
logger.error(f"Error extracting market share: {str(e)}")
return None
def extract_competitive_position(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract competitive position from competitor data."""
try:
position_indicators = []
if 'website_analysis.competitors' in source_data:
competitors = source_data['website_analysis.competitors']
if competitors:
position_indicators.append(f"Competitors: {competitors}")
if 'research_preferences.competitor_analysis' in source_data:
analysis = source_data['research_preferences.competitor_analysis']
if analysis:
position_indicators.append(f"Analysis: {analysis}")
return '; '.join(position_indicators) if position_indicators else None
except Exception as e:
logger.error(f"Error extracting competitive position: {str(e)}")
return None
def extract_performance_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract performance metrics."""
try:
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict):
return ', '.join([f"{k}: {v}" for k, v in metrics.items()])
elif isinstance(metrics, str):
return metrics
return None
except Exception as e:
logger.error(f"Error extracting performance metrics: {str(e)}")
return None
def extract_content_preferences(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract content preferences from research preferences."""
try:
if 'research_preferences.content_types' in source_data:
content_types = source_data['research_preferences.content_types']
if isinstance(content_types, list):
return ', '.join(content_types)
elif isinstance(content_types, str):
return content_types
return None
except Exception as e:
logger.error(f"Error extracting content preferences: {str(e)}")
return None
def extract_consumption_patterns(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract consumption patterns from audience data."""
try:
patterns = []
if 'website_analysis.target_audience' in source_data:
audience = source_data['website_analysis.target_audience']
if audience:
patterns.append(f"Website Audience: {audience}")
if 'research_preferences.target_audience' in source_data:
research_audience = source_data['research_preferences.target_audience']
if research_audience:
patterns.append(f"Research Audience: {research_audience}")
return '; '.join(patterns) if patterns else None
except Exception as e:
logger.error(f"Error extracting consumption patterns: {str(e)}")
return None
def extract_pain_points(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract audience pain points from content gaps and research topics."""
try:
pain_points = []
if 'website_analysis.content_gaps' in source_data:
gaps = source_data['website_analysis.content_gaps']
if isinstance(gaps, list):
pain_points.extend(gaps)
elif isinstance(gaps, str):
pain_points.append(gaps)
if 'research_preferences.research_topics' in source_data:
topics = source_data['research_preferences.research_topics']
if isinstance(topics, list):
pain_points.extend(topics)
elif isinstance(topics, str):
pain_points.append(topics)
return ', '.join(pain_points) if pain_points else None
except Exception as e:
logger.error(f"Error extracting pain points: {str(e)}")
return None
def extract_buying_journey(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract buying journey from audience data."""
try:
if 'website_analysis.target_audience' in source_data:
audience = source_data['website_analysis.target_audience']
if audience:
return f"Journey based on: {audience}"
return None
except Exception as e:
logger.error(f"Error extracting buying journey: {str(e)}")
return None
def extract_seasonal_trends(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract seasonal trends from trend analysis."""
try:
if 'research_preferences.trend_analysis' in source_data:
trends = source_data['research_preferences.trend_analysis']
if isinstance(trends, list):
return ', '.join(trends)
elif isinstance(trends, str):
return trends
return None
except Exception as e:
logger.error(f"Error extracting seasonal trends: {str(e)}")
return None
def extract_engagement_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract engagement metrics from performance data."""
try:
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict):
engagement_metrics = {k: v for k, v in metrics.items() if 'engagement' in k.lower()}
if engagement_metrics:
return ', '.join([f"{k}: {v}" for k, v in engagement_metrics.items()])
return None
except Exception as e:
logger.error(f"Error extracting engagement metrics: {str(e)}")
return None
def extract_competitors(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract top competitors from competitor data."""
try:
if 'website_analysis.competitors' in source_data:
competitors = source_data['website_analysis.competitors']
if isinstance(competitors, list):
return ', '.join(competitors)
elif isinstance(competitors, str):
return competitors
return None
except Exception as e:
logger.error(f"Error extracting competitors: {str(e)}")
return None
def extract_competitor_strategies(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract competitor content strategies."""
try:
strategies = []
if 'website_analysis.competitors' in source_data:
competitors = source_data['website_analysis.competitors']
if competitors:
strategies.append(f"Competitors: {competitors}")
if 'research_preferences.competitor_analysis' in source_data:
analysis = source_data['research_preferences.competitor_analysis']
if analysis:
strategies.append(f"Analysis: {analysis}")
return '; '.join(strategies) if strategies else None
except Exception as e:
logger.error(f"Error extracting competitor strategies: {str(e)}")
return None
def extract_market_gaps(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract market gaps from content gaps and research topics."""
try:
gaps = []
if 'website_analysis.content_gaps' in source_data:
content_gaps = source_data['website_analysis.content_gaps']
if isinstance(content_gaps, list):
gaps.extend(content_gaps)
elif isinstance(content_gaps, str):
gaps.append(content_gaps)
if 'research_preferences.research_topics' in source_data:
topics = source_data['research_preferences.research_topics']
if isinstance(topics, list):
gaps.extend(topics)
elif isinstance(topics, str):
gaps.append(topics)
return ', '.join(gaps) if gaps else None
except Exception as e:
logger.error(f"Error extracting market gaps: {str(e)}")
return None
def extract_industry_trends(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract industry trends from industry data."""
try:
trends = []
if 'website_analysis.industry' in source_data:
industry = source_data['website_analysis.industry']
if industry:
trends.append(f"Industry: {industry}")
if 'research_preferences.industry_focus' in source_data:
focus = source_data['research_preferences.industry_focus']
if focus:
trends.append(f"Focus: {focus}")
return '; '.join(trends) if trends else None
except Exception as e:
logger.error(f"Error extracting industry trends: {str(e)}")
return None
def extract_emerging_trends(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract emerging trends from trend analysis."""
try:
if 'research_preferences.trend_analysis' in source_data:
trends = source_data['research_preferences.trend_analysis']
if isinstance(trends, list):
return ', '.join(trends)
elif isinstance(trends, str):
return trends
return None
except Exception as e:
logger.error(f"Error extracting emerging trends: {str(e)}")
return None
def extract_preferred_formats(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract preferred content formats."""
try:
if 'research_preferences.content_types' in source_data:
content_types = source_data['research_preferences.content_types']
if isinstance(content_types, list):
return ', '.join(content_types)
elif isinstance(content_types, str):
return content_types
return None
except Exception as e:
logger.error(f"Error extracting preferred formats: {str(e)}")
return None
def extract_content_mix(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract content mix from content types and goals."""
try:
mix_components = []
if 'research_preferences.content_types' in source_data:
content_types = source_data['research_preferences.content_types']
if content_types:
mix_components.append(f"Types: {content_types}")
if 'website_analysis.content_goals' in source_data:
goals = source_data['website_analysis.content_goals']
if goals:
mix_components.append(f"Goals: {goals}")
return '; '.join(mix_components) if mix_components else None
except Exception as e:
logger.error(f"Error extracting content mix: {str(e)}")
return None
def extract_content_frequency(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract content frequency from calendar data."""
try:
if 'research_preferences.content_calendar' in source_data:
calendar = source_data['research_preferences.content_calendar']
if calendar:
return str(calendar)
return None
except Exception as e:
logger.error(f"Error extracting content frequency: {str(e)}")
return None
def extract_optimal_timing(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract optimal timing from calendar data."""
try:
if 'research_preferences.content_calendar' in source_data:
calendar = source_data['research_preferences.content_calendar']
if calendar:
return str(calendar)
return None
except Exception as e:
logger.error(f"Error extracting optimal timing: {str(e)}")
return None
def extract_quality_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract quality metrics from performance data."""
try:
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict):
quality_metrics = {k: v for k, v in metrics.items() if 'quality' in k.lower()}
if quality_metrics:
return ', '.join([f"{k}: {v}" for k, v in quality_metrics.items()])
return None
except Exception as e:
logger.error(f"Error extracting quality metrics: {str(e)}")
return None
def extract_editorial_guidelines(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract editorial guidelines from business type and content types."""
try:
guidelines = []
if 'website_analysis.business_type' in source_data:
business_type = source_data['website_analysis.business_type']
if business_type:
guidelines.append(f"Business Type: {business_type}")
if 'research_preferences.content_types' in source_data:
content_types = source_data['research_preferences.content_types']
if content_types:
guidelines.append(f"Content Types: {content_types}")
return '; '.join(guidelines) if guidelines else None
except Exception as e:
logger.error(f"Error extracting editorial guidelines: {str(e)}")
return None
def extract_brand_voice(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract brand voice from business type and session data."""
try:
voice_indicators = []
if 'website_analysis.business_type' in source_data:
business_type = source_data['website_analysis.business_type']
if business_type:
voice_indicators.append(f"Business Type: {business_type}")
if 'onboarding_session.session_data.brand_voice' in source_data:
brand_voice = source_data['onboarding_session.session_data.brand_voice']
if brand_voice:
voice_indicators.append(f"Brand Voice: {brand_voice}")
return '; '.join(voice_indicators) if voice_indicators else None
except Exception as e:
logger.error(f"Error extracting brand voice: {str(e)}")
return None
def extract_traffic_sources(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract traffic sources from performance metrics."""
try:
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict):
traffic_metrics = {k: v for k, v in metrics.items() if 'traffic' in k.lower()}
if traffic_metrics:
return ', '.join([f"{k}: {v}" for k, v in traffic_metrics.items()])
return None
except Exception as e:
logger.error(f"Error extracting traffic sources: {str(e)}")
return None
def extract_conversion_rates(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract conversion rates from performance metrics."""
try:
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict):
conversion_metrics = {k: v for k, v in metrics.items() if 'conversion' in k.lower()}
if conversion_metrics:
return ', '.join([f"{k}: {v}" for k, v in conversion_metrics.items()])
return None
except Exception as e:
logger.error(f"Error extracting conversion rates: {str(e)}")
return None
def extract_roi_targets(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract ROI targets from budget and performance data."""
try:
targets = []
if 'onboarding_session.session_data.budget' in source_data:
budget = source_data['onboarding_session.session_data.budget']
if budget:
targets.append(f"Budget: {budget}")
if 'website_analysis.performance_metrics' in source_data:
metrics = source_data['website_analysis.performance_metrics']
if isinstance(metrics, dict):
roi_metrics = {k: v for k, v in metrics.items() if 'roi' in k.lower()}
if roi_metrics:
targets.append(f"ROI Metrics: {roi_metrics}")
return '; '.join(targets) if targets else None
except Exception as e:
logger.error(f"Error extracting ROI targets: {str(e)}")
return None
def extract_ab_testing_capabilities(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract A/B testing capabilities from team size."""
try:
if 'onboarding_session.session_data.team_size' in source_data:
team_size = source_data['onboarding_session.session_data.team_size']
if team_size:
# Simple logic based on team size
if int(team_size) > 5:
return "Advanced A/B testing capabilities"
elif int(team_size) > 2:
return "Basic A/B testing capabilities"
else:
return "Limited A/B testing capabilities"
return None
except Exception as e:
logger.error(f"Error extracting A/B testing capabilities: {str(e)}")
return None