ALwrity Version 0.5.0 (Fastapi + React )
This commit is contained in:
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
Onboarding Module
|
||||
Onboarding data integration and processing services.
|
||||
"""
|
||||
|
||||
from .data_integration import OnboardingDataIntegrationService
|
||||
from .field_transformation import FieldTransformationService
|
||||
from .data_quality import DataQualityService
|
||||
|
||||
__all__ = ['OnboardingDataIntegrationService', 'FieldTransformationService', 'DataQualityService']
|
||||
@@ -0,0 +1,381 @@
|
||||
"""
|
||||
Onboarding Data Integration Service
|
||||
Onboarding data integration and processing.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
# Import database models
|
||||
from models.enhanced_strategy_models import (
|
||||
OnboardingDataIntegration
|
||||
)
|
||||
from models.onboarding import (
|
||||
OnboardingSession,
|
||||
WebsiteAnalysis,
|
||||
ResearchPreferences,
|
||||
APIKey
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class OnboardingDataIntegrationService:
|
||||
"""Service for onboarding data integration and processing."""
|
||||
|
||||
def __init__(self):
|
||||
self.data_freshness_threshold = timedelta(hours=24)
|
||||
self.max_analysis_age = timedelta(days=7)
|
||||
|
||||
async def process_onboarding_data(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Process and integrate all onboarding data for a user."""
|
||||
try:
|
||||
logger.info(f"Processing onboarding data for user: {user_id}")
|
||||
|
||||
# Get all onboarding data sources
|
||||
website_analysis = self._get_website_analysis(user_id, db)
|
||||
research_preferences = self._get_research_preferences(user_id, db)
|
||||
api_keys_data = self._get_api_keys_data(user_id, db)
|
||||
onboarding_session = self._get_onboarding_session(user_id, db)
|
||||
|
||||
# Process and integrate data
|
||||
integrated_data = {
|
||||
'website_analysis': website_analysis,
|
||||
'research_preferences': research_preferences,
|
||||
'api_keys_data': api_keys_data,
|
||||
'onboarding_session': onboarding_session,
|
||||
'data_quality': self._assess_data_quality(website_analysis, research_preferences, api_keys_data),
|
||||
'processing_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Store integrated data
|
||||
await self._store_integrated_data(user_id, integrated_data, db)
|
||||
|
||||
logger.info(f"Onboarding data processed successfully for user: {user_id}")
|
||||
return integrated_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing onboarding data for user {user_id}: {str(e)}")
|
||||
return self._get_fallback_data()
|
||||
|
||||
def _get_website_analysis(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Get website analysis data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Get the latest website analysis for this session
|
||||
website_analysis = db.query(WebsiteAnalysis).filter(
|
||||
WebsiteAnalysis.session_id == session.id
|
||||
).order_by(WebsiteAnalysis.updated_at.desc()).first()
|
||||
|
||||
if not website_analysis:
|
||||
logger.warning(f"No website analysis found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Convert to dictionary and add metadata
|
||||
analysis_data = website_analysis.to_dict()
|
||||
analysis_data['data_freshness'] = self._calculate_freshness(website_analysis.updated_at)
|
||||
analysis_data['confidence_level'] = 0.9 if website_analysis.status == 'completed' else 0.5
|
||||
|
||||
logger.info(f"Retrieved website analysis for user {user_id}: {website_analysis.website_url}")
|
||||
return analysis_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting website analysis for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_research_preferences(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Get research preferences data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Get research preferences for this session
|
||||
research_prefs = db.query(ResearchPreferences).filter(
|
||||
ResearchPreferences.session_id == session.id
|
||||
).first()
|
||||
|
||||
if not research_prefs:
|
||||
logger.warning(f"No research preferences found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Convert to dictionary and add metadata
|
||||
prefs_data = research_prefs.to_dict()
|
||||
prefs_data['data_freshness'] = self._calculate_freshness(research_prefs.updated_at)
|
||||
prefs_data['confidence_level'] = 0.9
|
||||
|
||||
logger.info(f"Retrieved research preferences for user {user_id}")
|
||||
return prefs_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting research preferences for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_api_keys_data(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Get API keys data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Get all API keys for this session
|
||||
api_keys = db.query(APIKey).filter(
|
||||
APIKey.session_id == session.id
|
||||
).all()
|
||||
|
||||
if not api_keys:
|
||||
logger.warning(f"No API keys found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Convert to dictionary format
|
||||
api_data = {
|
||||
'api_keys': [key.to_dict() for key in api_keys],
|
||||
'total_keys': len(api_keys),
|
||||
'providers': [key.provider for key in api_keys],
|
||||
'data_freshness': self._calculate_freshness(session.updated_at),
|
||||
'confidence_level': 0.8
|
||||
}
|
||||
|
||||
logger.info(f"Retrieved {len(api_keys)} API keys for user {user_id}")
|
||||
return api_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting API keys data for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_onboarding_session(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Get onboarding session data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Convert to dictionary
|
||||
session_data = {
|
||||
'id': session.id,
|
||||
'user_id': session.user_id,
|
||||
'current_step': session.current_step,
|
||||
'progress': session.progress,
|
||||
'started_at': session.started_at.isoformat() if session.started_at else None,
|
||||
'updated_at': session.updated_at.isoformat() if session.updated_at else None,
|
||||
'data_freshness': self._calculate_freshness(session.updated_at),
|
||||
'confidence_level': 0.9
|
||||
}
|
||||
|
||||
logger.info(f"Retrieved onboarding session for user {user_id}: step {session.current_step}, progress {session.progress}%")
|
||||
return session_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting onboarding session for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _assess_data_quality(self, website_analysis: Dict, research_preferences: Dict, api_keys_data: Dict) -> Dict[str, Any]:
|
||||
"""Assess the quality and completeness of onboarding data."""
|
||||
try:
|
||||
quality_metrics = {
|
||||
'overall_score': 0.0,
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'relevance': 0.0,
|
||||
'confidence': 0.0
|
||||
}
|
||||
|
||||
# Calculate completeness
|
||||
total_fields = 0
|
||||
filled_fields = 0
|
||||
|
||||
# Website analysis completeness
|
||||
website_fields = ['domain', 'industry', 'business_type', 'target_audience', 'content_goals']
|
||||
for field in website_fields:
|
||||
total_fields += 1
|
||||
if website_analysis.get(field):
|
||||
filled_fields += 1
|
||||
|
||||
# Research preferences completeness
|
||||
research_fields = ['research_topics', 'content_types', 'target_audience', 'industry_focus']
|
||||
for field in research_fields:
|
||||
total_fields += 1
|
||||
if research_preferences.get(field):
|
||||
filled_fields += 1
|
||||
|
||||
# API keys completeness
|
||||
total_fields += 1
|
||||
if api_keys_data:
|
||||
filled_fields += 1
|
||||
|
||||
quality_metrics['completeness'] = filled_fields / total_fields if total_fields > 0 else 0.0
|
||||
|
||||
# Calculate freshness
|
||||
freshness_scores = []
|
||||
for data_source in [website_analysis, research_preferences]:
|
||||
if data_source.get('data_freshness'):
|
||||
freshness_scores.append(data_source['data_freshness'])
|
||||
|
||||
quality_metrics['freshness'] = sum(freshness_scores) / len(freshness_scores) if freshness_scores else 0.0
|
||||
|
||||
# Calculate relevance (based on data presence and quality)
|
||||
relevance_score = 0.0
|
||||
if website_analysis.get('domain'):
|
||||
relevance_score += 0.4
|
||||
if research_preferences.get('research_topics'):
|
||||
relevance_score += 0.3
|
||||
if api_keys_data:
|
||||
relevance_score += 0.3
|
||||
|
||||
quality_metrics['relevance'] = relevance_score
|
||||
|
||||
# Calculate confidence
|
||||
quality_metrics['confidence'] = (quality_metrics['completeness'] + quality_metrics['freshness'] + quality_metrics['relevance']) / 3
|
||||
|
||||
# Calculate overall score
|
||||
quality_metrics['overall_score'] = quality_metrics['confidence']
|
||||
|
||||
return quality_metrics
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing data quality: {str(e)}")
|
||||
return {
|
||||
'overall_score': 0.0,
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'relevance': 0.0,
|
||||
'confidence': 0.0
|
||||
}
|
||||
|
||||
def _calculate_freshness(self, created_at: datetime) -> float:
|
||||
"""Calculate data freshness score (0.0 to 1.0)."""
|
||||
try:
|
||||
age = datetime.utcnow() - created_at
|
||||
|
||||
if age <= self.data_freshness_threshold:
|
||||
return 1.0
|
||||
elif age <= self.max_analysis_age:
|
||||
# Linear decay from 1.0 to 0.5
|
||||
decay_factor = 1.0 - (age - self.data_freshness_threshold) / (self.max_analysis_age - self.data_freshness_threshold) * 0.5
|
||||
return max(0.5, decay_factor)
|
||||
else:
|
||||
return 0.5 # Minimum freshness for old data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating data freshness: {str(e)}")
|
||||
return 0.5
|
||||
|
||||
def _check_api_data_availability(self, api_key_data: Dict) -> bool:
|
||||
"""Check if API key has available data."""
|
||||
try:
|
||||
# Check if API key has been used recently and has data
|
||||
if api_key_data.get('last_used') and api_key_data.get('usage_count', 0) > 0:
|
||||
return api_key_data.get('data_available', False)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking API data availability: {str(e)}")
|
||||
return False
|
||||
|
||||
async def _store_integrated_data(self, user_id: int, integrated_data: Dict[str, Any], db: Session) -> None:
|
||||
"""Store integrated onboarding data."""
|
||||
try:
|
||||
# Create or update integrated data record
|
||||
existing_record = db.query(OnboardingDataIntegration).filter(
|
||||
OnboardingDataIntegration.user_id == user_id
|
||||
).first()
|
||||
|
||||
if existing_record:
|
||||
existing_record.website_analysis_data = integrated_data.get('website_analysis', {})
|
||||
existing_record.research_preferences_data = integrated_data.get('research_preferences', {})
|
||||
existing_record.api_keys_data = integrated_data.get('api_keys_data', {})
|
||||
existing_record.updated_at = datetime.utcnow()
|
||||
else:
|
||||
new_record = OnboardingDataIntegration(
|
||||
user_id=user_id,
|
||||
website_analysis_data=integrated_data.get('website_analysis', {}),
|
||||
research_preferences_data=integrated_data.get('research_preferences', {}),
|
||||
api_keys_data=integrated_data.get('api_keys_data', {}),
|
||||
created_at=datetime.utcnow(),
|
||||
updated_at=datetime.utcnow()
|
||||
)
|
||||
db.add(new_record)
|
||||
|
||||
db.commit()
|
||||
logger.info(f"Integrated onboarding data stored for user: {user_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error storing integrated data for user {user_id}: {str(e)}")
|
||||
db.rollback()
|
||||
|
||||
def _get_fallback_data(self) -> Dict[str, Any]:
|
||||
"""Get fallback data when processing fails."""
|
||||
return {
|
||||
'website_analysis': {},
|
||||
'research_preferences': {},
|
||||
'api_keys_data': {},
|
||||
'onboarding_session': {},
|
||||
'data_quality': {
|
||||
'overall_score': 0.0,
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'relevance': 0.0,
|
||||
'confidence': 0.0
|
||||
},
|
||||
'processing_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def get_integrated_data(self, user_id: int, db: Session) -> Optional[Dict[str, Any]]:
|
||||
"""Get previously integrated onboarding data for a user."""
|
||||
try:
|
||||
record = db.query(OnboardingDataIntegration).filter(
|
||||
OnboardingDataIntegration.user_id == user_id
|
||||
).first()
|
||||
|
||||
if record:
|
||||
# Reconstruct integrated data from stored fields
|
||||
integrated_data = {
|
||||
'website_analysis': record.website_analysis_data or {},
|
||||
'research_preferences': record.research_preferences_data or {},
|
||||
'api_keys_data': record.api_keys_data or {},
|
||||
'onboarding_session': {},
|
||||
'data_quality': self._assess_data_quality(
|
||||
record.website_analysis_data or {},
|
||||
record.research_preferences_data or {},
|
||||
record.api_keys_data or {}
|
||||
),
|
||||
'processing_timestamp': record.updated_at.isoformat()
|
||||
}
|
||||
|
||||
# Check if data is still fresh
|
||||
updated_at = record.updated_at
|
||||
if datetime.utcnow() - updated_at <= self.data_freshness_threshold:
|
||||
return integrated_data
|
||||
else:
|
||||
logger.info(f"Integrated data is stale for user {user_id}, reprocessing...")
|
||||
return await self.process_onboarding_data(user_id, db)
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting integrated data for user {user_id}: {str(e)}")
|
||||
return None
|
||||
@@ -0,0 +1,547 @@
|
||||
"""
|
||||
Data Quality Service
|
||||
Onboarding data quality assessment.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DataQualityService:
|
||||
"""Service for assessing data quality and validation."""
|
||||
|
||||
def __init__(self):
|
||||
self.quality_thresholds = {
|
||||
'excellent': 0.9,
|
||||
'good': 0.7,
|
||||
'fair': 0.5,
|
||||
'poor': 0.3
|
||||
}
|
||||
|
||||
self.data_freshness_threshold = timedelta(hours=24)
|
||||
self.max_data_age = timedelta(days=30)
|
||||
|
||||
def assess_onboarding_data_quality(self, integrated_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess the overall quality of onboarding data."""
|
||||
try:
|
||||
logger.info("Assessing onboarding data quality")
|
||||
|
||||
quality_assessment = {
|
||||
'overall_score': 0.0,
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'accuracy': 0.0,
|
||||
'relevance': 0.0,
|
||||
'consistency': 0.0,
|
||||
'confidence': 0.0,
|
||||
'quality_level': 'poor',
|
||||
'recommendations': [],
|
||||
'issues': [],
|
||||
'assessment_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Assess each data source
|
||||
website_quality = self._assess_website_analysis_quality(integrated_data.get('website_analysis', {}))
|
||||
research_quality = self._assess_research_preferences_quality(integrated_data.get('research_preferences', {}))
|
||||
api_quality = self._assess_api_keys_quality(integrated_data.get('api_keys_data', {}))
|
||||
session_quality = self._assess_onboarding_session_quality(integrated_data.get('onboarding_session', {}))
|
||||
|
||||
# Calculate overall quality metrics
|
||||
quality_assessment['completeness'] = self._calculate_completeness_score(
|
||||
website_quality, research_quality, api_quality, session_quality
|
||||
)
|
||||
|
||||
quality_assessment['freshness'] = self._calculate_freshness_score(
|
||||
website_quality, research_quality, api_quality, session_quality
|
||||
)
|
||||
|
||||
quality_assessment['accuracy'] = self._calculate_accuracy_score(
|
||||
website_quality, research_quality, api_quality, session_quality
|
||||
)
|
||||
|
||||
quality_assessment['relevance'] = self._calculate_relevance_score(
|
||||
website_quality, research_quality, api_quality, session_quality
|
||||
)
|
||||
|
||||
quality_assessment['consistency'] = self._calculate_consistency_score(
|
||||
website_quality, research_quality, api_quality, session_quality
|
||||
)
|
||||
|
||||
# Calculate confidence and overall score
|
||||
quality_assessment['confidence'] = (
|
||||
quality_assessment['completeness'] +
|
||||
quality_assessment['freshness'] +
|
||||
quality_assessment['accuracy'] +
|
||||
quality_assessment['relevance'] +
|
||||
quality_assessment['consistency']
|
||||
) / 5
|
||||
|
||||
quality_assessment['overall_score'] = quality_assessment['confidence']
|
||||
|
||||
# Determine quality level
|
||||
quality_assessment['quality_level'] = self._determine_quality_level(quality_assessment['overall_score'])
|
||||
|
||||
# Generate recommendations and identify issues
|
||||
quality_assessment['recommendations'] = self._generate_quality_recommendations(quality_assessment)
|
||||
quality_assessment['issues'] = self._identify_quality_issues(quality_assessment)
|
||||
|
||||
logger.info(f"Data quality assessment completed. Overall score: {quality_assessment['overall_score']:.2f}")
|
||||
return quality_assessment
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing data quality: {str(e)}")
|
||||
return self._get_fallback_quality_assessment()
|
||||
|
||||
def _assess_website_analysis_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess quality of website analysis data."""
|
||||
try:
|
||||
quality_metrics = {
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'accuracy': 0.0,
|
||||
'relevance': 0.0,
|
||||
'consistency': 0.0
|
||||
}
|
||||
|
||||
if not website_data:
|
||||
return quality_metrics
|
||||
|
||||
# Completeness assessment
|
||||
required_fields = ['domain', 'industry', 'business_type', 'target_audience', 'content_goals']
|
||||
present_fields = sum(1 for field in required_fields if website_data.get(field))
|
||||
quality_metrics['completeness'] = present_fields / len(required_fields)
|
||||
|
||||
# Freshness assessment
|
||||
if website_data.get('created_at'):
|
||||
try:
|
||||
created_at = datetime.fromisoformat(website_data['created_at'].replace('Z', '+00:00'))
|
||||
age = datetime.utcnow() - created_at
|
||||
quality_metrics['freshness'] = self._calculate_freshness_score_from_age(age)
|
||||
except Exception:
|
||||
quality_metrics['freshness'] = 0.5
|
||||
|
||||
# Accuracy assessment (based on data presence and format)
|
||||
accuracy_score = 0.0
|
||||
if website_data.get('domain') and isinstance(website_data['domain'], str):
|
||||
accuracy_score += 0.2
|
||||
if website_data.get('industry') and isinstance(website_data['industry'], str):
|
||||
accuracy_score += 0.2
|
||||
if website_data.get('business_type') and isinstance(website_data['business_type'], str):
|
||||
accuracy_score += 0.2
|
||||
if website_data.get('target_audience') and isinstance(website_data['target_audience'], str):
|
||||
accuracy_score += 0.2
|
||||
if website_data.get('content_goals') and isinstance(website_data['content_goals'], (str, list)):
|
||||
accuracy_score += 0.2
|
||||
quality_metrics['accuracy'] = accuracy_score
|
||||
|
||||
# Relevance assessment
|
||||
relevance_score = 0.0
|
||||
if website_data.get('domain'):
|
||||
relevance_score += 0.3
|
||||
if website_data.get('industry'):
|
||||
relevance_score += 0.3
|
||||
if website_data.get('content_goals'):
|
||||
relevance_score += 0.4
|
||||
quality_metrics['relevance'] = relevance_score
|
||||
|
||||
# Consistency assessment
|
||||
consistency_score = 0.0
|
||||
if website_data.get('domain') and website_data.get('industry'):
|
||||
consistency_score += 0.5
|
||||
if website_data.get('target_audience') and website_data.get('content_goals'):
|
||||
consistency_score += 0.5
|
||||
quality_metrics['consistency'] = consistency_score
|
||||
|
||||
return quality_metrics
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing website analysis quality: {str(e)}")
|
||||
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
|
||||
|
||||
def _assess_research_preferences_quality(self, research_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess quality of research preferences data."""
|
||||
try:
|
||||
quality_metrics = {
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'accuracy': 0.0,
|
||||
'relevance': 0.0,
|
||||
'consistency': 0.0
|
||||
}
|
||||
|
||||
if not research_data:
|
||||
return quality_metrics
|
||||
|
||||
# Completeness assessment
|
||||
required_fields = ['research_topics', 'content_types', 'target_audience', 'industry_focus']
|
||||
present_fields = sum(1 for field in required_fields if research_data.get(field))
|
||||
quality_metrics['completeness'] = present_fields / len(required_fields)
|
||||
|
||||
# Freshness assessment
|
||||
if research_data.get('created_at'):
|
||||
try:
|
||||
created_at = datetime.fromisoformat(research_data['created_at'].replace('Z', '+00:00'))
|
||||
age = datetime.utcnow() - created_at
|
||||
quality_metrics['freshness'] = self._calculate_freshness_score_from_age(age)
|
||||
except Exception:
|
||||
quality_metrics['freshness'] = 0.5
|
||||
|
||||
# Accuracy assessment
|
||||
accuracy_score = 0.0
|
||||
if research_data.get('research_topics') and isinstance(research_data['research_topics'], (str, list)):
|
||||
accuracy_score += 0.25
|
||||
if research_data.get('content_types') and isinstance(research_data['content_types'], (str, list)):
|
||||
accuracy_score += 0.25
|
||||
if research_data.get('target_audience') and isinstance(research_data['target_audience'], str):
|
||||
accuracy_score += 0.25
|
||||
if research_data.get('industry_focus') and isinstance(research_data['industry_focus'], str):
|
||||
accuracy_score += 0.25
|
||||
quality_metrics['accuracy'] = accuracy_score
|
||||
|
||||
# Relevance assessment
|
||||
relevance_score = 0.0
|
||||
if research_data.get('research_topics'):
|
||||
relevance_score += 0.4
|
||||
if research_data.get('content_types'):
|
||||
relevance_score += 0.3
|
||||
if research_data.get('target_audience'):
|
||||
relevance_score += 0.3
|
||||
quality_metrics['relevance'] = relevance_score
|
||||
|
||||
# Consistency assessment
|
||||
consistency_score = 0.0
|
||||
if research_data.get('research_topics') and research_data.get('content_types'):
|
||||
consistency_score += 0.5
|
||||
if research_data.get('target_audience') and research_data.get('industry_focus'):
|
||||
consistency_score += 0.5
|
||||
quality_metrics['consistency'] = consistency_score
|
||||
|
||||
return quality_metrics
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing research preferences quality: {str(e)}")
|
||||
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
|
||||
|
||||
def _assess_api_keys_quality(self, api_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess quality of API keys data."""
|
||||
try:
|
||||
quality_metrics = {
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'accuracy': 0.0,
|
||||
'relevance': 0.0,
|
||||
'consistency': 0.0
|
||||
}
|
||||
|
||||
if not api_data:
|
||||
return quality_metrics
|
||||
|
||||
# Completeness assessment
|
||||
total_apis = len(api_data)
|
||||
active_apis = sum(1 for api_info in api_data.values() if api_info.get('is_active'))
|
||||
quality_metrics['completeness'] = active_apis / max(total_apis, 1)
|
||||
|
||||
# Freshness assessment
|
||||
freshness_scores = []
|
||||
for api_info in api_data.values():
|
||||
if api_info.get('last_used'):
|
||||
try:
|
||||
last_used = datetime.fromisoformat(api_info['last_used'].replace('Z', '+00:00'))
|
||||
age = datetime.utcnow() - last_used
|
||||
freshness_scores.append(self._calculate_freshness_score_from_age(age))
|
||||
except Exception:
|
||||
freshness_scores.append(0.5)
|
||||
|
||||
quality_metrics['freshness'] = sum(freshness_scores) / len(freshness_scores) if freshness_scores else 0.5
|
||||
|
||||
# Accuracy assessment
|
||||
accuracy_score = 0.0
|
||||
for api_info in api_data.values():
|
||||
if api_info.get('service_name') and api_info.get('is_active'):
|
||||
accuracy_score += 0.5
|
||||
if api_info.get('data_available'):
|
||||
accuracy_score += 0.5
|
||||
quality_metrics['accuracy'] = accuracy_score / max(len(api_data), 1)
|
||||
|
||||
# Relevance assessment
|
||||
relevant_apis = ['google_analytics', 'google_search_console', 'semrush', 'ahrefs', 'moz']
|
||||
relevant_count = sum(1 for api_name in api_data.keys() if api_name.lower() in relevant_apis)
|
||||
quality_metrics['relevance'] = relevant_count / max(len(api_data), 1)
|
||||
|
||||
# Consistency assessment
|
||||
consistency_score = 0.0
|
||||
if len(api_data) > 0:
|
||||
consistency_score = 0.5 # Basic consistency if APIs exist
|
||||
if any(api_info.get('data_available') for api_info in api_data.values()):
|
||||
consistency_score += 0.5
|
||||
quality_metrics['consistency'] = consistency_score
|
||||
|
||||
return quality_metrics
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing API keys quality: {str(e)}")
|
||||
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
|
||||
|
||||
def _assess_onboarding_session_quality(self, session_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Assess quality of onboarding session data."""
|
||||
try:
|
||||
quality_metrics = {
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'accuracy': 0.0,
|
||||
'relevance': 0.0,
|
||||
'consistency': 0.0
|
||||
}
|
||||
|
||||
if not session_data:
|
||||
return quality_metrics
|
||||
|
||||
# Completeness assessment
|
||||
required_fields = ['session_id', 'completion_percentage', 'completed_steps', 'current_step']
|
||||
present_fields = sum(1 for field in required_fields if session_data.get(field))
|
||||
quality_metrics['completeness'] = present_fields / len(required_fields)
|
||||
|
||||
# Freshness assessment
|
||||
if session_data.get('updated_at'):
|
||||
try:
|
||||
updated_at = datetime.fromisoformat(session_data['updated_at'].replace('Z', '+00:00'))
|
||||
age = datetime.utcnow() - updated_at
|
||||
quality_metrics['freshness'] = self._calculate_freshness_score_from_age(age)
|
||||
except Exception:
|
||||
quality_metrics['freshness'] = 0.5
|
||||
|
||||
# Accuracy assessment
|
||||
accuracy_score = 0.0
|
||||
if session_data.get('session_id') and isinstance(session_data['session_id'], str):
|
||||
accuracy_score += 0.25
|
||||
if session_data.get('completion_percentage') and isinstance(session_data['completion_percentage'], (int, float)):
|
||||
accuracy_score += 0.25
|
||||
if session_data.get('completed_steps') and isinstance(session_data['completed_steps'], (list, int)):
|
||||
accuracy_score += 0.25
|
||||
if session_data.get('current_step') and isinstance(session_data['current_step'], (str, int)):
|
||||
accuracy_score += 0.25
|
||||
quality_metrics['accuracy'] = accuracy_score
|
||||
|
||||
# Relevance assessment
|
||||
relevance_score = 0.0
|
||||
if session_data.get('completion_percentage', 0) > 50:
|
||||
relevance_score += 0.5
|
||||
if session_data.get('session_data'):
|
||||
relevance_score += 0.5
|
||||
quality_metrics['relevance'] = relevance_score
|
||||
|
||||
# Consistency assessment
|
||||
consistency_score = 0.0
|
||||
if session_data.get('completion_percentage') and session_data.get('completed_steps'):
|
||||
consistency_score += 0.5
|
||||
if session_data.get('current_step') and session_data.get('session_id'):
|
||||
consistency_score += 0.5
|
||||
quality_metrics['consistency'] = consistency_score
|
||||
|
||||
return quality_metrics
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing onboarding session quality: {str(e)}")
|
||||
return {'completeness': 0.0, 'freshness': 0.0, 'accuracy': 0.0, 'relevance': 0.0, 'consistency': 0.0}
|
||||
|
||||
def _calculate_completeness_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
|
||||
"""Calculate overall completeness score."""
|
||||
try:
|
||||
scores = [
|
||||
website_quality['completeness'],
|
||||
research_quality['completeness'],
|
||||
api_quality['completeness'],
|
||||
session_quality['completeness']
|
||||
]
|
||||
return sum(scores) / len(scores)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating completeness score: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_freshness_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
|
||||
"""Calculate overall freshness score."""
|
||||
try:
|
||||
scores = [
|
||||
website_quality['freshness'],
|
||||
research_quality['freshness'],
|
||||
api_quality['freshness'],
|
||||
session_quality['freshness']
|
||||
]
|
||||
return sum(scores) / len(scores)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating freshness score: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_accuracy_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
|
||||
"""Calculate overall accuracy score."""
|
||||
try:
|
||||
scores = [
|
||||
website_quality['accuracy'],
|
||||
research_quality['accuracy'],
|
||||
api_quality['accuracy'],
|
||||
session_quality['accuracy']
|
||||
]
|
||||
return sum(scores) / len(scores)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating accuracy score: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_relevance_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
|
||||
"""Calculate overall relevance score."""
|
||||
try:
|
||||
scores = [
|
||||
website_quality['relevance'],
|
||||
research_quality['relevance'],
|
||||
api_quality['relevance'],
|
||||
session_quality['relevance']
|
||||
]
|
||||
return sum(scores) / len(scores)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating relevance score: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_consistency_score(self, website_quality: Dict, research_quality: Dict, api_quality: Dict, session_quality: Dict) -> float:
|
||||
"""Calculate overall consistency score."""
|
||||
try:
|
||||
scores = [
|
||||
website_quality['consistency'],
|
||||
research_quality['consistency'],
|
||||
api_quality['consistency'],
|
||||
session_quality['consistency']
|
||||
]
|
||||
return sum(scores) / len(scores)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating consistency score: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_freshness_score_from_age(self, age: timedelta) -> float:
|
||||
"""Calculate freshness score based on data age."""
|
||||
try:
|
||||
if age <= self.data_freshness_threshold:
|
||||
return 1.0
|
||||
elif age <= self.max_data_age:
|
||||
# Linear decay from 1.0 to 0.5
|
||||
decay_factor = 1.0 - (age - self.data_freshness_threshold) / (self.max_data_age - self.data_freshness_threshold) * 0.5
|
||||
return max(0.5, decay_factor)
|
||||
else:
|
||||
return 0.5 # Minimum freshness for old data
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating freshness score from age: {str(e)}")
|
||||
return 0.5
|
||||
|
||||
def _determine_quality_level(self, overall_score: float) -> str:
|
||||
"""Determine quality level based on overall score."""
|
||||
try:
|
||||
if overall_score >= self.quality_thresholds['excellent']:
|
||||
return 'excellent'
|
||||
elif overall_score >= self.quality_thresholds['good']:
|
||||
return 'good'
|
||||
elif overall_score >= self.quality_thresholds['fair']:
|
||||
return 'fair'
|
||||
else:
|
||||
return 'poor'
|
||||
except Exception as e:
|
||||
logger.error(f"Error determining quality level: {str(e)}")
|
||||
return 'poor'
|
||||
|
||||
def _generate_quality_recommendations(self, quality_assessment: Dict[str, Any]) -> List[str]:
|
||||
"""Generate recommendations based on quality assessment."""
|
||||
try:
|
||||
recommendations = []
|
||||
|
||||
if quality_assessment['completeness'] < 0.7:
|
||||
recommendations.append("Complete missing onboarding data to improve strategy accuracy")
|
||||
|
||||
if quality_assessment['freshness'] < 0.7:
|
||||
recommendations.append("Update stale data to ensure current market insights")
|
||||
|
||||
if quality_assessment['accuracy'] < 0.7:
|
||||
recommendations.append("Verify data accuracy for better strategy recommendations")
|
||||
|
||||
if quality_assessment['relevance'] < 0.7:
|
||||
recommendations.append("Provide more relevant data for targeted strategy development")
|
||||
|
||||
if quality_assessment['consistency'] < 0.7:
|
||||
recommendations.append("Ensure data consistency across different sources")
|
||||
|
||||
if quality_assessment['overall_score'] < 0.5:
|
||||
recommendations.append("Consider re-running onboarding process for better data quality")
|
||||
|
||||
return recommendations
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating quality recommendations: {str(e)}")
|
||||
return ["Unable to generate recommendations due to assessment error"]
|
||||
|
||||
def _identify_quality_issues(self, quality_assessment: Dict[str, Any]) -> List[str]:
|
||||
"""Identify specific quality issues."""
|
||||
try:
|
||||
issues = []
|
||||
|
||||
if quality_assessment['completeness'] < 0.5:
|
||||
issues.append("Incomplete data: Missing critical onboarding information")
|
||||
|
||||
if quality_assessment['freshness'] < 0.5:
|
||||
issues.append("Stale data: Information may be outdated")
|
||||
|
||||
if quality_assessment['accuracy'] < 0.5:
|
||||
issues.append("Data accuracy concerns: Verify information validity")
|
||||
|
||||
if quality_assessment['relevance'] < 0.5:
|
||||
issues.append("Low relevance: Data may not align with current needs")
|
||||
|
||||
if quality_assessment['consistency'] < 0.5:
|
||||
issues.append("Inconsistent data: Conflicting information detected")
|
||||
|
||||
return issues
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error identifying quality issues: {str(e)}")
|
||||
return ["Unable to identify issues due to assessment error"]
|
||||
|
||||
def _get_fallback_quality_assessment(self) -> Dict[str, Any]:
|
||||
"""Get fallback quality assessment when assessment fails."""
|
||||
return {
|
||||
'overall_score': 0.0,
|
||||
'completeness': 0.0,
|
||||
'freshness': 0.0,
|
||||
'accuracy': 0.0,
|
||||
'relevance': 0.0,
|
||||
'consistency': 0.0,
|
||||
'confidence': 0.0,
|
||||
'quality_level': 'poor',
|
||||
'recommendations': ['Unable to assess data quality'],
|
||||
'issues': ['Quality assessment failed'],
|
||||
'assessment_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
def validate_field_data(self, field_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate individual field data."""
|
||||
try:
|
||||
validation_result = {
|
||||
'is_valid': True,
|
||||
'errors': [],
|
||||
'warnings': [],
|
||||
'confidence': 1.0
|
||||
}
|
||||
|
||||
for field_name, field_value in field_data.items():
|
||||
if field_value is None or field_value == '':
|
||||
validation_result['errors'].append(f"Field '{field_name}' is empty")
|
||||
validation_result['is_valid'] = False
|
||||
elif isinstance(field_value, str) and len(field_value.strip()) < 3:
|
||||
validation_result['warnings'].append(f"Field '{field_name}' may be too short")
|
||||
validation_result['confidence'] *= 0.9
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating field data: {str(e)}")
|
||||
return {
|
||||
'is_valid': False,
|
||||
'errors': ['Validation failed'],
|
||||
'warnings': [],
|
||||
'confidence': 0.0
|
||||
}
|
||||
@@ -0,0 +1,790 @@
|
||||
"""
|
||||
Field Transformation Service
|
||||
Onboarding data to field mapping.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class FieldTransformationService:
|
||||
"""Service for transforming onboarding data to strategic input fields."""
|
||||
|
||||
def __init__(self):
|
||||
# Define field mapping configurations
|
||||
self.field_mappings = {
|
||||
# Business Context mappings
|
||||
'business_objectives': {
|
||||
'sources': ['website_analysis.content_goals', 'research_preferences.research_topics'],
|
||||
'transformation': 'extract_business_objectives'
|
||||
},
|
||||
'target_metrics': {
|
||||
'sources': ['website_analysis.performance_metrics', 'research_preferences.performance_tracking'],
|
||||
'transformation': 'extract_target_metrics'
|
||||
},
|
||||
'content_budget': {
|
||||
'sources': ['onboarding_session.session_data.budget'],
|
||||
'transformation': 'extract_budget'
|
||||
},
|
||||
'team_size': {
|
||||
'sources': ['onboarding_session.session_data.team_size'],
|
||||
'transformation': 'extract_team_size'
|
||||
},
|
||||
'implementation_timeline': {
|
||||
'sources': ['onboarding_session.session_data.timeline'],
|
||||
'transformation': 'extract_timeline'
|
||||
},
|
||||
'market_share': {
|
||||
'sources': ['website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_market_share'
|
||||
},
|
||||
'competitive_position': {
|
||||
'sources': ['website_analysis.competitors', 'research_preferences.competitor_analysis'],
|
||||
'transformation': 'extract_competitive_position'
|
||||
},
|
||||
'performance_metrics': {
|
||||
'sources': ['website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_performance_metrics'
|
||||
},
|
||||
|
||||
# Audience Intelligence mappings
|
||||
'content_preferences': {
|
||||
'sources': ['research_preferences.content_types'],
|
||||
'transformation': 'extract_content_preferences'
|
||||
},
|
||||
'consumption_patterns': {
|
||||
'sources': ['website_analysis.target_audience', 'research_preferences.target_audience'],
|
||||
'transformation': 'extract_consumption_patterns'
|
||||
},
|
||||
'audience_pain_points': {
|
||||
'sources': ['website_analysis.content_gaps', 'research_preferences.research_topics'],
|
||||
'transformation': 'extract_pain_points'
|
||||
},
|
||||
'buying_journey': {
|
||||
'sources': ['website_analysis.target_audience', 'research_preferences.target_audience'],
|
||||
'transformation': 'extract_buying_journey'
|
||||
},
|
||||
'seasonal_trends': {
|
||||
'sources': ['research_preferences.trend_analysis'],
|
||||
'transformation': 'extract_seasonal_trends'
|
||||
},
|
||||
'engagement_metrics': {
|
||||
'sources': ['website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_engagement_metrics'
|
||||
},
|
||||
|
||||
# Competitive Intelligence mappings
|
||||
'top_competitors': {
|
||||
'sources': ['website_analysis.competitors'],
|
||||
'transformation': 'extract_competitors'
|
||||
},
|
||||
'competitor_content_strategies': {
|
||||
'sources': ['website_analysis.competitors', 'research_preferences.competitor_analysis'],
|
||||
'transformation': 'extract_competitor_strategies'
|
||||
},
|
||||
'market_gaps': {
|
||||
'sources': ['website_analysis.content_gaps', 'research_preferences.research_topics'],
|
||||
'transformation': 'extract_market_gaps'
|
||||
},
|
||||
'industry_trends': {
|
||||
'sources': ['website_analysis.industry', 'research_preferences.industry_focus'],
|
||||
'transformation': 'extract_industry_trends'
|
||||
},
|
||||
'emerging_trends': {
|
||||
'sources': ['research_preferences.trend_analysis'],
|
||||
'transformation': 'extract_emerging_trends'
|
||||
},
|
||||
|
||||
# Content Strategy mappings
|
||||
'preferred_formats': {
|
||||
'sources': ['research_preferences.content_types'],
|
||||
'transformation': 'extract_preferred_formats'
|
||||
},
|
||||
'content_mix': {
|
||||
'sources': ['research_preferences.content_types', 'website_analysis.content_goals'],
|
||||
'transformation': 'extract_content_mix'
|
||||
},
|
||||
'content_frequency': {
|
||||
'sources': ['research_preferences.content_calendar'],
|
||||
'transformation': 'extract_content_frequency'
|
||||
},
|
||||
'optimal_timing': {
|
||||
'sources': ['research_preferences.content_calendar'],
|
||||
'transformation': 'extract_optimal_timing'
|
||||
},
|
||||
'quality_metrics': {
|
||||
'sources': ['website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_quality_metrics'
|
||||
},
|
||||
'editorial_guidelines': {
|
||||
'sources': ['website_analysis.business_type', 'research_preferences.content_types'],
|
||||
'transformation': 'extract_editorial_guidelines'
|
||||
},
|
||||
'brand_voice': {
|
||||
'sources': ['website_analysis.business_type', 'onboarding_session.session_data.brand_voice'],
|
||||
'transformation': 'extract_brand_voice'
|
||||
},
|
||||
|
||||
# Performance Analytics mappings
|
||||
'traffic_sources': {
|
||||
'sources': ['website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_traffic_sources'
|
||||
},
|
||||
'conversion_rates': {
|
||||
'sources': ['website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_conversion_rates'
|
||||
},
|
||||
'content_roi_targets': {
|
||||
'sources': ['onboarding_session.session_data.budget', 'website_analysis.performance_metrics'],
|
||||
'transformation': 'extract_roi_targets'
|
||||
},
|
||||
'ab_testing_capabilities': {
|
||||
'sources': ['onboarding_session.session_data.team_size'],
|
||||
'transformation': 'extract_ab_testing_capabilities'
|
||||
}
|
||||
}
|
||||
|
||||
def transform_onboarding_data_to_fields(self, integrated_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Transform integrated onboarding data to strategic input fields."""
|
||||
try:
|
||||
logger.info("Transforming onboarding data to strategic fields")
|
||||
|
||||
transformed_fields = {}
|
||||
data_sources = {}
|
||||
|
||||
for field_id, mapping_config in self.field_mappings.items():
|
||||
try:
|
||||
# Extract data from sources
|
||||
source_data = self._extract_source_data(integrated_data, mapping_config['sources'])
|
||||
|
||||
if source_data:
|
||||
# Apply transformation
|
||||
transformation_method = getattr(self, mapping_config['transformation'])
|
||||
transformed_value = transformation_method(source_data, integrated_data)
|
||||
|
||||
if transformed_value:
|
||||
transformed_fields[field_id] = transformed_value
|
||||
data_sources[field_id] = self._get_data_source_info(mapping_config['sources'], integrated_data)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error transforming field {field_id}: {str(e)}")
|
||||
continue
|
||||
|
||||
result = {
|
||||
'fields': transformed_fields,
|
||||
'sources': data_sources,
|
||||
'transformation_metadata': {
|
||||
'total_fields_processed': len(self.field_mappings),
|
||||
'successful_transformations': len(transformed_fields),
|
||||
'transformation_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(f"Successfully transformed {len(transformed_fields)} fields from onboarding data")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error transforming onboarding data to fields: {str(e)}")
|
||||
return {'fields': {}, 'sources': {}, 'transformation_metadata': {'error': str(e)}}
|
||||
|
||||
def _extract_source_data(self, integrated_data: Dict[str, Any], sources: List[str]) -> Dict[str, Any]:
|
||||
"""Extract data from specified sources."""
|
||||
source_data = {}
|
||||
|
||||
for source_path in sources:
|
||||
try:
|
||||
# Navigate nested dictionary structure
|
||||
keys = source_path.split('.')
|
||||
value = integrated_data
|
||||
|
||||
for key in keys:
|
||||
if isinstance(value, dict) and key in value:
|
||||
value = value[key]
|
||||
else:
|
||||
value = None
|
||||
break
|
||||
|
||||
if value is not None:
|
||||
source_data[source_path] = value
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error extracting data from {source_path}: {str(e)}")
|
||||
continue
|
||||
|
||||
return source_data
|
||||
|
||||
def _get_data_source_info(self, sources: List[str], integrated_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Get information about data sources for a field."""
|
||||
source_info = {
|
||||
'sources': sources,
|
||||
'data_quality': self._assess_source_quality(sources, integrated_data),
|
||||
'last_updated': datetime.utcnow().isoformat()
|
||||
}
|
||||
return source_info
|
||||
|
||||
def _assess_source_quality(self, sources: List[str], integrated_data: Dict[str, Any]) -> float:
|
||||
"""Assess the quality of data sources."""
|
||||
try:
|
||||
quality_scores = []
|
||||
|
||||
for source in sources:
|
||||
# Check if source exists and has data
|
||||
keys = source.split('.')
|
||||
value = integrated_data
|
||||
|
||||
for key in keys:
|
||||
if isinstance(value, dict) and key in value:
|
||||
value = value[key]
|
||||
else:
|
||||
value = None
|
||||
break
|
||||
|
||||
if value:
|
||||
# Basic quality assessment
|
||||
if isinstance(value, (list, dict)) and len(value) > 0:
|
||||
quality_scores.append(1.0)
|
||||
elif isinstance(value, str) and len(value.strip()) > 0:
|
||||
quality_scores.append(0.8)
|
||||
else:
|
||||
quality_scores.append(0.5)
|
||||
else:
|
||||
quality_scores.append(0.0)
|
||||
|
||||
return sum(quality_scores) / len(quality_scores) if quality_scores else 0.0
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing source quality: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
# Transformation methods for each field type
|
||||
def extract_business_objectives(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract business objectives from content goals and research topics."""
|
||||
try:
|
||||
objectives = []
|
||||
|
||||
if 'website_analysis.content_goals' in source_data:
|
||||
goals = source_data['website_analysis.content_goals']
|
||||
if isinstance(goals, list):
|
||||
objectives.extend(goals)
|
||||
elif isinstance(goals, str):
|
||||
objectives.append(goals)
|
||||
|
||||
if 'research_preferences.research_topics' in source_data:
|
||||
topics = source_data['research_preferences.research_topics']
|
||||
if isinstance(topics, list):
|
||||
objectives.extend(topics)
|
||||
elif isinstance(topics, str):
|
||||
objectives.append(topics)
|
||||
|
||||
return ', '.join(objectives) if objectives else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting business objectives: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_target_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract target metrics from performance data."""
|
||||
try:
|
||||
metrics = []
|
||||
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
perf_metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(perf_metrics, dict):
|
||||
metrics.extend([f"{k}: {v}" for k, v in perf_metrics.items()])
|
||||
elif isinstance(perf_metrics, str):
|
||||
metrics.append(perf_metrics)
|
||||
|
||||
if 'research_preferences.performance_tracking' in source_data:
|
||||
tracking = source_data['research_preferences.performance_tracking']
|
||||
if isinstance(tracking, list):
|
||||
metrics.extend(tracking)
|
||||
elif isinstance(tracking, str):
|
||||
metrics.append(tracking)
|
||||
|
||||
return ', '.join(metrics) if metrics else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting target metrics: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_budget(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract content budget from session data."""
|
||||
try:
|
||||
if 'onboarding_session.session_data.budget' in source_data:
|
||||
budget = source_data['onboarding_session.session_data.budget']
|
||||
if budget:
|
||||
return str(budget)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting budget: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_team_size(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract team size from session data."""
|
||||
try:
|
||||
if 'onboarding_session.session_data.team_size' in source_data:
|
||||
team_size = source_data['onboarding_session.session_data.team_size']
|
||||
if team_size:
|
||||
return str(team_size)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting team size: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_timeline(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract implementation timeline from session data."""
|
||||
try:
|
||||
if 'onboarding_session.session_data.timeline' in source_data:
|
||||
timeline = source_data['onboarding_session.session_data.timeline']
|
||||
if timeline:
|
||||
return str(timeline)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting timeline: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_market_share(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract market share from performance metrics."""
|
||||
try:
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict) and 'market_share' in metrics:
|
||||
return str(metrics['market_share'])
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting market share: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_competitive_position(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract competitive position from competitor data."""
|
||||
try:
|
||||
position_indicators = []
|
||||
|
||||
if 'website_analysis.competitors' in source_data:
|
||||
competitors = source_data['website_analysis.competitors']
|
||||
if competitors:
|
||||
position_indicators.append(f"Competitors: {competitors}")
|
||||
|
||||
if 'research_preferences.competitor_analysis' in source_data:
|
||||
analysis = source_data['research_preferences.competitor_analysis']
|
||||
if analysis:
|
||||
position_indicators.append(f"Analysis: {analysis}")
|
||||
|
||||
return '; '.join(position_indicators) if position_indicators else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting competitive position: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_performance_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract performance metrics."""
|
||||
try:
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict):
|
||||
return ', '.join([f"{k}: {v}" for k, v in metrics.items()])
|
||||
elif isinstance(metrics, str):
|
||||
return metrics
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting performance metrics: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_content_preferences(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract content preferences from research preferences."""
|
||||
try:
|
||||
if 'research_preferences.content_types' in source_data:
|
||||
content_types = source_data['research_preferences.content_types']
|
||||
if isinstance(content_types, list):
|
||||
return ', '.join(content_types)
|
||||
elif isinstance(content_types, str):
|
||||
return content_types
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content preferences: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_consumption_patterns(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract consumption patterns from audience data."""
|
||||
try:
|
||||
patterns = []
|
||||
|
||||
if 'website_analysis.target_audience' in source_data:
|
||||
audience = source_data['website_analysis.target_audience']
|
||||
if audience:
|
||||
patterns.append(f"Website Audience: {audience}")
|
||||
|
||||
if 'research_preferences.target_audience' in source_data:
|
||||
research_audience = source_data['research_preferences.target_audience']
|
||||
if research_audience:
|
||||
patterns.append(f"Research Audience: {research_audience}")
|
||||
|
||||
return '; '.join(patterns) if patterns else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting consumption patterns: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_pain_points(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract audience pain points from content gaps and research topics."""
|
||||
try:
|
||||
pain_points = []
|
||||
|
||||
if 'website_analysis.content_gaps' in source_data:
|
||||
gaps = source_data['website_analysis.content_gaps']
|
||||
if isinstance(gaps, list):
|
||||
pain_points.extend(gaps)
|
||||
elif isinstance(gaps, str):
|
||||
pain_points.append(gaps)
|
||||
|
||||
if 'research_preferences.research_topics' in source_data:
|
||||
topics = source_data['research_preferences.research_topics']
|
||||
if isinstance(topics, list):
|
||||
pain_points.extend(topics)
|
||||
elif isinstance(topics, str):
|
||||
pain_points.append(topics)
|
||||
|
||||
return ', '.join(pain_points) if pain_points else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting pain points: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_buying_journey(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract buying journey from audience data."""
|
||||
try:
|
||||
if 'website_analysis.target_audience' in source_data:
|
||||
audience = source_data['website_analysis.target_audience']
|
||||
if audience:
|
||||
return f"Journey based on: {audience}"
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting buying journey: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_seasonal_trends(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract seasonal trends from trend analysis."""
|
||||
try:
|
||||
if 'research_preferences.trend_analysis' in source_data:
|
||||
trends = source_data['research_preferences.trend_analysis']
|
||||
if isinstance(trends, list):
|
||||
return ', '.join(trends)
|
||||
elif isinstance(trends, str):
|
||||
return trends
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting seasonal trends: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_engagement_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract engagement metrics from performance data."""
|
||||
try:
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict):
|
||||
engagement_metrics = {k: v for k, v in metrics.items() if 'engagement' in k.lower()}
|
||||
if engagement_metrics:
|
||||
return ', '.join([f"{k}: {v}" for k, v in engagement_metrics.items()])
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting engagement metrics: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_competitors(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract top competitors from competitor data."""
|
||||
try:
|
||||
if 'website_analysis.competitors' in source_data:
|
||||
competitors = source_data['website_analysis.competitors']
|
||||
if isinstance(competitors, list):
|
||||
return ', '.join(competitors)
|
||||
elif isinstance(competitors, str):
|
||||
return competitors
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting competitors: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_competitor_strategies(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract competitor content strategies."""
|
||||
try:
|
||||
strategies = []
|
||||
|
||||
if 'website_analysis.competitors' in source_data:
|
||||
competitors = source_data['website_analysis.competitors']
|
||||
if competitors:
|
||||
strategies.append(f"Competitors: {competitors}")
|
||||
|
||||
if 'research_preferences.competitor_analysis' in source_data:
|
||||
analysis = source_data['research_preferences.competitor_analysis']
|
||||
if analysis:
|
||||
strategies.append(f"Analysis: {analysis}")
|
||||
|
||||
return '; '.join(strategies) if strategies else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting competitor strategies: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_market_gaps(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract market gaps from content gaps and research topics."""
|
||||
try:
|
||||
gaps = []
|
||||
|
||||
if 'website_analysis.content_gaps' in source_data:
|
||||
content_gaps = source_data['website_analysis.content_gaps']
|
||||
if isinstance(content_gaps, list):
|
||||
gaps.extend(content_gaps)
|
||||
elif isinstance(content_gaps, str):
|
||||
gaps.append(content_gaps)
|
||||
|
||||
if 'research_preferences.research_topics' in source_data:
|
||||
topics = source_data['research_preferences.research_topics']
|
||||
if isinstance(topics, list):
|
||||
gaps.extend(topics)
|
||||
elif isinstance(topics, str):
|
||||
gaps.append(topics)
|
||||
|
||||
return ', '.join(gaps) if gaps else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting market gaps: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_industry_trends(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract industry trends from industry data."""
|
||||
try:
|
||||
trends = []
|
||||
|
||||
if 'website_analysis.industry' in source_data:
|
||||
industry = source_data['website_analysis.industry']
|
||||
if industry:
|
||||
trends.append(f"Industry: {industry}")
|
||||
|
||||
if 'research_preferences.industry_focus' in source_data:
|
||||
focus = source_data['research_preferences.industry_focus']
|
||||
if focus:
|
||||
trends.append(f"Focus: {focus}")
|
||||
|
||||
return '; '.join(trends) if trends else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting industry trends: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_emerging_trends(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract emerging trends from trend analysis."""
|
||||
try:
|
||||
if 'research_preferences.trend_analysis' in source_data:
|
||||
trends = source_data['research_preferences.trend_analysis']
|
||||
if isinstance(trends, list):
|
||||
return ', '.join(trends)
|
||||
elif isinstance(trends, str):
|
||||
return trends
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting emerging trends: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_preferred_formats(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract preferred content formats."""
|
||||
try:
|
||||
if 'research_preferences.content_types' in source_data:
|
||||
content_types = source_data['research_preferences.content_types']
|
||||
if isinstance(content_types, list):
|
||||
return ', '.join(content_types)
|
||||
elif isinstance(content_types, str):
|
||||
return content_types
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting preferred formats: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_content_mix(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract content mix from content types and goals."""
|
||||
try:
|
||||
mix_components = []
|
||||
|
||||
if 'research_preferences.content_types' in source_data:
|
||||
content_types = source_data['research_preferences.content_types']
|
||||
if content_types:
|
||||
mix_components.append(f"Types: {content_types}")
|
||||
|
||||
if 'website_analysis.content_goals' in source_data:
|
||||
goals = source_data['website_analysis.content_goals']
|
||||
if goals:
|
||||
mix_components.append(f"Goals: {goals}")
|
||||
|
||||
return '; '.join(mix_components) if mix_components else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content mix: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_content_frequency(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract content frequency from calendar data."""
|
||||
try:
|
||||
if 'research_preferences.content_calendar' in source_data:
|
||||
calendar = source_data['research_preferences.content_calendar']
|
||||
if calendar:
|
||||
return str(calendar)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content frequency: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_optimal_timing(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract optimal timing from calendar data."""
|
||||
try:
|
||||
if 'research_preferences.content_calendar' in source_data:
|
||||
calendar = source_data['research_preferences.content_calendar']
|
||||
if calendar:
|
||||
return str(calendar)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting optimal timing: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_quality_metrics(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract quality metrics from performance data."""
|
||||
try:
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict):
|
||||
quality_metrics = {k: v for k, v in metrics.items() if 'quality' in k.lower()}
|
||||
if quality_metrics:
|
||||
return ', '.join([f"{k}: {v}" for k, v in quality_metrics.items()])
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting quality metrics: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_editorial_guidelines(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract editorial guidelines from business type and content types."""
|
||||
try:
|
||||
guidelines = []
|
||||
|
||||
if 'website_analysis.business_type' in source_data:
|
||||
business_type = source_data['website_analysis.business_type']
|
||||
if business_type:
|
||||
guidelines.append(f"Business Type: {business_type}")
|
||||
|
||||
if 'research_preferences.content_types' in source_data:
|
||||
content_types = source_data['research_preferences.content_types']
|
||||
if content_types:
|
||||
guidelines.append(f"Content Types: {content_types}")
|
||||
|
||||
return '; '.join(guidelines) if guidelines else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting editorial guidelines: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_brand_voice(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract brand voice from business type and session data."""
|
||||
try:
|
||||
voice_indicators = []
|
||||
|
||||
if 'website_analysis.business_type' in source_data:
|
||||
business_type = source_data['website_analysis.business_type']
|
||||
if business_type:
|
||||
voice_indicators.append(f"Business Type: {business_type}")
|
||||
|
||||
if 'onboarding_session.session_data.brand_voice' in source_data:
|
||||
brand_voice = source_data['onboarding_session.session_data.brand_voice']
|
||||
if brand_voice:
|
||||
voice_indicators.append(f"Brand Voice: {brand_voice}")
|
||||
|
||||
return '; '.join(voice_indicators) if voice_indicators else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting brand voice: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_traffic_sources(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract traffic sources from performance metrics."""
|
||||
try:
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict):
|
||||
traffic_metrics = {k: v for k, v in metrics.items() if 'traffic' in k.lower()}
|
||||
if traffic_metrics:
|
||||
return ', '.join([f"{k}: {v}" for k, v in traffic_metrics.items()])
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting traffic sources: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_conversion_rates(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract conversion rates from performance metrics."""
|
||||
try:
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict):
|
||||
conversion_metrics = {k: v for k, v in metrics.items() if 'conversion' in k.lower()}
|
||||
if conversion_metrics:
|
||||
return ', '.join([f"{k}: {v}" for k, v in conversion_metrics.items()])
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting conversion rates: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_roi_targets(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract ROI targets from budget and performance data."""
|
||||
try:
|
||||
targets = []
|
||||
|
||||
if 'onboarding_session.session_data.budget' in source_data:
|
||||
budget = source_data['onboarding_session.session_data.budget']
|
||||
if budget:
|
||||
targets.append(f"Budget: {budget}")
|
||||
|
||||
if 'website_analysis.performance_metrics' in source_data:
|
||||
metrics = source_data['website_analysis.performance_metrics']
|
||||
if isinstance(metrics, dict):
|
||||
roi_metrics = {k: v for k, v in metrics.items() if 'roi' in k.lower()}
|
||||
if roi_metrics:
|
||||
targets.append(f"ROI Metrics: {roi_metrics}")
|
||||
|
||||
return '; '.join(targets) if targets else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting ROI targets: {str(e)}")
|
||||
return None
|
||||
|
||||
def extract_ab_testing_capabilities(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract A/B testing capabilities from team size."""
|
||||
try:
|
||||
if 'onboarding_session.session_data.team_size' in source_data:
|
||||
team_size = source_data['onboarding_session.session_data.team_size']
|
||||
if team_size:
|
||||
# Simple logic based on team size
|
||||
if int(team_size) > 5:
|
||||
return "Advanced A/B testing capabilities"
|
||||
elif int(team_size) > 2:
|
||||
return "Basic A/B testing capabilities"
|
||||
else:
|
||||
return "Limited A/B testing capabilities"
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting A/B testing capabilities: {str(e)}")
|
||||
return None
|
||||
Reference in New Issue
Block a user