ALwrity version 0.5.4

This commit is contained in:
ajaysi
2025-08-09 23:14:16 +05:30
parent 01fe1e0a9c
commit 5c08b6e007
42 changed files with 3514 additions and 2148 deletions

View File

@@ -1,10 +1,16 @@
"""
Onboarding Module
Onboarding data integration and processing services.
Onboarding data integration and processing.
"""
from .data_integration import OnboardingDataIntegrationService
from .field_transformation import FieldTransformationService
from .data_quality import DataQualityService
from .field_transformation import FieldTransformationService
from .data_processor import OnboardingDataProcessor
__all__ = ['OnboardingDataIntegrationService', 'FieldTransformationService', 'DataQualityService']
__all__ = [
'OnboardingDataIntegrationService',
'DataQualityService',
'FieldTransformationService',
'OnboardingDataProcessor'
]

View File

@@ -305,19 +305,28 @@ class OnboardingDataIntegrationService:
).first()
if existing_record:
existing_record.website_analysis_data = integrated_data.get('website_analysis', {})
existing_record.research_preferences_data = integrated_data.get('research_preferences', {})
existing_record.api_keys_data = integrated_data.get('api_keys_data', {})
# Use legacy columns that are known to exist
if hasattr(existing_record, 'website_analysis_data'):
existing_record.website_analysis_data = integrated_data.get('website_analysis', {})
if hasattr(existing_record, 'research_preferences_data'):
existing_record.research_preferences_data = integrated_data.get('research_preferences', {})
if hasattr(existing_record, 'api_keys_data'):
existing_record.api_keys_data = integrated_data.get('api_keys_data', {})
existing_record.updated_at = datetime.utcnow()
else:
new_record = OnboardingDataIntegration(
user_id=user_id,
website_analysis_data=integrated_data.get('website_analysis', {}),
research_preferences_data=integrated_data.get('research_preferences', {}),
api_keys_data=integrated_data.get('api_keys_data', {}),
created_at=datetime.utcnow(),
updated_at=datetime.utcnow()
)
new_kwargs = {
'user_id': user_id,
'created_at': datetime.utcnow(),
'updated_at': datetime.utcnow()
}
if 'website_analysis' in integrated_data:
new_kwargs['website_analysis_data'] = integrated_data.get('website_analysis', {})
if 'research_preferences' in integrated_data:
new_kwargs['research_preferences_data'] = integrated_data.get('research_preferences', {})
if 'api_keys_data' in integrated_data:
new_kwargs['api_keys_data'] = integrated_data.get('api_keys_data', {})
new_record = OnboardingDataIntegration(**new_kwargs)
db.add(new_record)
db.commit()
@@ -326,6 +335,8 @@ class OnboardingDataIntegrationService:
except Exception as e:
logger.error(f"Error storing integrated data for user {user_id}: {str(e)}")
db.rollback()
# Soft-fail storage: do not break the refresh path
return
def _get_fallback_data(self) -> Dict[str, Any]:
"""Get fallback data when processing fails."""

View File

@@ -0,0 +1,301 @@
"""
Onboarding Data Processor
Handles processing and transformation of onboarding data for strategic intelligence.
"""
import logging
from typing import Dict, List, Any, Optional, Union
from datetime import datetime
from sqlalchemy.orm import Session
# Import database models
from models.onboarding import OnboardingSession, WebsiteAnalysis, ResearchPreferences, APIKey
logger = logging.getLogger(__name__)
class OnboardingDataProcessor:
"""Processes and transforms onboarding data for strategic intelligence generation."""
def __init__(self):
pass
async def process_onboarding_data(self, user_id: int, db: Session) -> Optional[Dict[str, Any]]:
"""Process onboarding data for a user and return structured data for strategic intelligence."""
try:
logger.info(f"Processing onboarding data for user {user_id}")
# Get onboarding session
onboarding_session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).first()
if not onboarding_session:
logger.warning(f"No onboarding session found for user {user_id}")
return None
# Get website analysis data
website_analysis = db.query(WebsiteAnalysis).filter(
WebsiteAnalysis.session_id == onboarding_session.id
).first()
# Get research preferences data
research_preferences = db.query(ResearchPreferences).filter(
ResearchPreferences.session_id == onboarding_session.id
).first()
# Get API keys data
api_keys = db.query(APIKey).filter(
APIKey.session_id == onboarding_session.id
).all()
# Process each data type
processed_data = {
'website_analysis': await self._process_website_analysis(website_analysis),
'research_preferences': await self._process_research_preferences(research_preferences),
'api_keys_data': await self._process_api_keys_data(api_keys),
'session_data': self._process_session_data(onboarding_session)
}
# Transform into strategic intelligence format
strategic_data = self._transform_to_strategic_format(processed_data)
logger.info(f"Successfully processed onboarding data for user {user_id}")
return strategic_data
except Exception as e:
logger.error(f"Error processing onboarding data for user {user_id}: {str(e)}")
return None
async def _process_website_analysis(self, website_analysis: Optional[WebsiteAnalysis]) -> Dict[str, Any]:
"""Process website analysis data."""
if not website_analysis:
return {}
try:
return {
'website_url': getattr(website_analysis, 'website_url', ''),
'industry': getattr(website_analysis, 'industry', 'Technology'), # Default value if attribute doesn't exist
'content_goals': getattr(website_analysis, 'content_goals', []),
'performance_metrics': getattr(website_analysis, 'performance_metrics', {}),
'traffic_sources': getattr(website_analysis, 'traffic_sources', []),
'content_gaps': getattr(website_analysis, 'content_gaps', []),
'topics': getattr(website_analysis, 'topics', []),
'content_quality_score': getattr(website_analysis, 'content_quality_score', 0),
'seo_opportunities': getattr(website_analysis, 'seo_opportunities', []),
'competitors': getattr(website_analysis, 'competitors', []),
'competitive_advantages': getattr(website_analysis, 'competitive_advantages', []),
'market_gaps': getattr(website_analysis, 'market_gaps', []),
'last_updated': website_analysis.updated_at.isoformat() if hasattr(website_analysis, 'updated_at') and website_analysis.updated_at else None
}
except Exception as e:
logger.error(f"Error processing website analysis: {str(e)}")
return {}
async def _process_research_preferences(self, research_preferences: Optional[ResearchPreferences]) -> Dict[str, Any]:
"""Process research preferences data."""
if not research_preferences:
return {}
try:
return {
'content_preferences': {
'preferred_formats': research_preferences.content_types,
'content_topics': research_preferences.research_topics,
'content_style': research_preferences.writing_style.get('tone', []) if research_preferences.writing_style else [],
'content_length': research_preferences.content_length,
'visual_preferences': research_preferences.visual_preferences
},
'audience_research': {
'target_audience': research_preferences.target_audience.get('demographics', []) if research_preferences.target_audience else [],
'audience_pain_points': research_preferences.target_audience.get('pain_points', []) if research_preferences.target_audience else [],
'buying_journey': research_preferences.target_audience.get('buying_journey', {}) if research_preferences.target_audience else {},
'consumption_patterns': research_preferences.target_audience.get('consumption_patterns', {}) if research_preferences.target_audience else {}
},
'research_goals': {
'primary_goals': research_preferences.research_topics,
'secondary_goals': research_preferences.content_types,
'success_metrics': research_preferences.success_metrics
},
'last_updated': research_preferences.updated_at.isoformat() if research_preferences.updated_at else None
}
except Exception as e:
logger.error(f"Error processing research preferences: {str(e)}")
return {}
async def _process_api_keys_data(self, api_keys: List[APIKey]) -> Dict[str, Any]:
"""Process API keys data."""
try:
processed_data = {
'analytics_data': {},
'social_media_data': {},
'competitor_data': {},
'last_updated': None
}
for api_key in api_keys:
if api_key.provider == 'google_analytics':
processed_data['analytics_data']['google_analytics'] = {
'connected': True,
'data_available': True,
'metrics': api_key.metrics if api_key.metrics else {}
}
elif api_key.provider == 'google_search_console':
processed_data['analytics_data']['google_search_console'] = {
'connected': True,
'data_available': True,
'metrics': api_key.metrics if api_key.metrics else {}
}
elif api_key.provider in ['linkedin', 'twitter', 'facebook']:
processed_data['social_media_data'][api_key.provider] = {
'connected': True,
'followers': api_key.metrics.get('followers', 0) if api_key.metrics else 0
}
elif api_key.provider in ['semrush', 'ahrefs', 'moz']:
processed_data['competitor_data'][api_key.provider] = {
'connected': True,
'competitors_analyzed': api_key.metrics.get('competitors_analyzed', 0) if api_key.metrics else 0
}
# Update last_updated if this key is more recent
if api_key.updated_at and (not processed_data['last_updated'] or api_key.updated_at > datetime.fromisoformat(processed_data['last_updated'])):
processed_data['last_updated'] = api_key.updated_at.isoformat()
return processed_data
except Exception as e:
logger.error(f"Error processing API keys data: {str(e)}")
return {}
def _process_session_data(self, onboarding_session: OnboardingSession) -> Dict[str, Any]:
"""Process onboarding session data."""
try:
return {
'session_id': getattr(onboarding_session, 'id', None),
'user_id': getattr(onboarding_session, 'user_id', None),
'created_at': onboarding_session.created_at.isoformat() if hasattr(onboarding_session, 'created_at') and onboarding_session.created_at else None,
'updated_at': onboarding_session.updated_at.isoformat() if hasattr(onboarding_session, 'updated_at') and onboarding_session.updated_at else None,
'completion_status': getattr(onboarding_session, 'completion_status', 'in_progress'),
'session_data': getattr(onboarding_session, 'session_data', {}),
'progress_percentage': getattr(onboarding_session, 'progress_percentage', 0),
'last_activity': getattr(onboarding_session, 'last_activity', None)
}
except Exception as e:
logger.error(f"Error processing session data: {str(e)}")
return {}
def _transform_to_strategic_format(self, processed_data: Dict[str, Any]) -> Dict[str, Any]:
"""Transform processed onboarding data into strategic intelligence format."""
try:
website_data = processed_data.get('website_analysis', {})
research_data = processed_data.get('research_preferences', {})
api_data = processed_data.get('api_keys_data', {})
session_data = processed_data.get('session_data', {})
# Return data in nested format that field transformation service expects
return {
'website_analysis': {
'content_goals': website_data.get('content_goals', []),
'performance_metrics': website_data.get('performance_metrics', {}),
'competitors': website_data.get('competitors', []),
'content_gaps': website_data.get('content_gaps', []),
'industry': website_data.get('industry', 'Technology'),
'target_audience': website_data.get('target_audience', {}),
'business_type': website_data.get('business_type', 'Technology')
},
'research_preferences': {
'content_types': research_data.get('content_preferences', {}).get('preferred_formats', []),
'research_topics': research_data.get('research_topics', []),
'performance_tracking': research_data.get('performance_tracking', []),
'competitor_analysis': research_data.get('competitor_analysis', []),
'target_audience': research_data.get('audience_research', {}).get('target_audience', {}),
'industry_focus': research_data.get('industry_focus', []),
'trend_analysis': research_data.get('trend_analysis', []),
'content_calendar': research_data.get('content_calendar', {})
},
'onboarding_session': {
'session_data': {
'budget': session_data.get('budget', 3000),
'team_size': session_data.get('team_size', 2),
'timeline': session_data.get('timeline', '3 months'),
'brand_voice': session_data.get('brand_voice', 'Professional yet approachable')
}
}
}
except Exception as e:
logger.error(f"Error transforming to strategic format: {str(e)}")
return {}
def calculate_data_quality_scores(self, processed_data: Dict[str, Any]) -> Dict[str, float]:
"""Calculate quality scores for each data source."""
scores = {}
for source, data in processed_data.items():
if data and isinstance(data, dict):
# Simple scoring based on data completeness
total_fields = len(data)
present_fields = len([v for v in data.values() if v is not None and v != {}])
completeness = present_fields / total_fields if total_fields > 0 else 0.0
scores[source] = completeness * 100
else:
scores[source] = 0.0
return scores
def calculate_confidence_levels(self, processed_data: Dict[str, Any]) -> Dict[str, float]:
"""Calculate confidence levels for processed data."""
confidence_levels = {}
# Base confidence on data source quality
base_confidence = {
'website_analysis': 0.8,
'research_preferences': 0.7,
'api_keys_data': 0.6,
'session_data': 0.9
}
for source, data in processed_data.items():
if data and isinstance(data, dict):
# Adjust confidence based on data completeness
quality_score = self.calculate_data_quality_scores({source: data})[source] / 100
base_conf = base_confidence.get(source, 0.5)
confidence_levels[source] = base_conf * quality_score
else:
confidence_levels[source] = 0.0
return confidence_levels
def calculate_data_freshness(self, session_data: Dict[str, Any]) -> Dict[str, Any]:
"""Calculate data freshness for onboarding data."""
try:
updated_at = session_data.get('updated_at')
if not updated_at:
return {'status': 'unknown', 'age_days': 'unknown'}
# Convert string to datetime if needed
if isinstance(updated_at, str):
try:
updated_at = datetime.fromisoformat(updated_at.replace('Z', '+00:00'))
except ValueError:
return {'status': 'unknown', 'age_days': 'unknown'}
age_days = (datetime.utcnow() - updated_at).days
if age_days <= 7:
status = 'fresh'
elif age_days <= 30:
status = 'recent'
elif age_days <= 90:
status = 'aging'
else:
status = 'stale'
return {
'status': status,
'age_days': age_days,
'last_updated': updated_at.isoformat() if hasattr(updated_at, 'isoformat') else str(updated_at)
}
except Exception as e:
logger.error(f"Error calculating data freshness: {str(e)}")
return {'status': 'unknown', 'age_days': 'unknown'}

View File

@@ -92,7 +92,8 @@ class DataQualityService:
except Exception as e:
logger.error(f"Error assessing data quality: {str(e)}")
return self._get_fallback_quality_assessment()
# Raise exception instead of returning fallback data
raise Exception(f"Failed to assess data quality: {str(e)}")
def _assess_website_analysis_quality(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
"""Assess quality of website analysis data."""
@@ -501,22 +502,6 @@ class DataQualityService:
logger.error(f"Error identifying quality issues: {str(e)}")
return ["Unable to identify issues due to assessment error"]
def _get_fallback_quality_assessment(self) -> Dict[str, Any]:
"""Get fallback quality assessment when assessment fails."""
return {
'overall_score': 0.0,
'completeness': 0.0,
'freshness': 0.0,
'accuracy': 0.0,
'relevance': 0.0,
'consistency': 0.0,
'confidence': 0.0,
'quality_level': 'poor',
'recommendations': ['Unable to assess data quality'],
'issues': ['Quality assessment failed'],
'assessment_timestamp': datetime.utcnow().isoformat()
}
def validate_field_data(self, field_data: Dict[str, Any]) -> Dict[str, Any]:
"""Validate individual field data."""
try:

View File

@@ -147,48 +147,108 @@ class FieldTransformationService:
}
def transform_onboarding_data_to_fields(self, integrated_data: Dict[str, Any]) -> Dict[str, Any]:
"""Transform integrated onboarding data to strategic input fields."""
"""Transform onboarding data to strategic input fields."""
try:
logger.info("Transforming onboarding data to strategic fields")
transformed_fields = {}
data_sources = {}
for field_id, mapping_config in self.field_mappings.items():
try:
# Extract data from sources
source_data = self._extract_source_data(integrated_data, mapping_config['sources'])
if source_data:
# Apply transformation
transformation_method = getattr(self, mapping_config['transformation'])
transformed_value = transformation_method(source_data, integrated_data)
if transformed_value:
transformed_fields[field_id] = transformed_value
data_sources[field_id] = self._get_data_source_info(mapping_config['sources'], integrated_data)
except Exception as e:
logger.warning(f"Error transforming field {field_id}: {str(e)}")
continue
result = {
'fields': transformed_fields,
'sources': data_sources,
'transformation_metadata': {
'total_fields_processed': len(self.field_mappings),
'successful_transformations': len(transformed_fields),
'transformation_timestamp': datetime.utcnow().isoformat()
}
transformation_metadata = {
'total_fields': 0,
'populated_fields': 0,
'data_sources_used': [],
'confidence_scores': {}
}
logger.info(f"Successfully transformed {len(transformed_fields)} fields from onboarding data")
return result
# Process each field mapping
for field_name, mapping in self.field_mappings.items():
try:
sources = mapping.get('sources', [])
transformation_method = mapping.get('transformation')
# Extract source data
source_data = self._extract_source_data(integrated_data, sources)
# Apply transformation if method exists
if transformation_method and hasattr(self, transformation_method):
transform_func = getattr(self, transformation_method)
field_value = transform_func(source_data, integrated_data)
else:
# Default transformation - use first available source data
field_value = self._default_transformation(source_data, field_name)
# If no value found, provide default based on field type
if field_value is None or field_value == "":
field_value = self._get_default_value_for_field(field_name)
if field_value is not None:
transformed_fields[field_name] = {
'value': field_value,
'source': sources[0] if sources else 'default',
'confidence': self._calculate_field_confidence(source_data, sources),
'auto_populated': True
}
transformation_metadata['populated_fields'] += 1
transformation_metadata['total_fields'] += 1
except Exception as e:
logger.error(f"Error transforming field {field_name}: {str(e)}")
# Don't provide fallback data - let the error propagate
transformation_metadata['total_fields'] += 1
logger.info(f"Successfully transformed {transformation_metadata['populated_fields']} fields from onboarding data")
return {
'fields': transformed_fields,
'sources': self._get_data_source_info(list(self.field_mappings.keys()), integrated_data),
'transformation_metadata': transformation_metadata
}
except Exception as e:
logger.error(f"Error transforming onboarding data to fields: {str(e)}")
logger.error(f"Error in transform_onboarding_data_to_fields: {str(e)}")
return {'fields': {}, 'sources': {}, 'transformation_metadata': {'error': str(e)}}
def get_data_sources(self, integrated_data: Dict[str, Any]) -> Dict[str, Any]:
"""Get data sources information for the transformed fields."""
try:
sources_info = {}
for field_name, mapping in self.field_mappings.items():
sources = mapping.get('sources', [])
sources_info[field_name] = {
'sources': sources,
'source_count': len(sources),
'has_data': any(self._has_source_data(integrated_data, source) for source in sources)
}
return sources_info
except Exception as e:
logger.error(f"Error getting data sources: {str(e)}")
return {}
def get_detailed_input_data_points(self, integrated_data: Dict[str, Any]) -> Dict[str, Any]:
"""Get detailed input data points for debugging and analysis."""
try:
data_points = {}
for field_name, mapping in self.field_mappings.items():
sources = mapping.get('sources', [])
source_data = {}
for source in sources:
source_data[source] = {
'exists': self._has_source_data(integrated_data, source),
'value': self._get_nested_value(integrated_data, source),
'type': type(self._get_nested_value(integrated_data, source)).__name__
}
data_points[field_name] = {
'sources': source_data,
'transformation_method': mapping.get('transformation'),
'has_data': any(source_data[source]['exists'] for source in sources)
}
return data_points
except Exception as e:
logger.error(f"Error getting detailed input data points: {str(e)}")
return {}
def _extract_source_data(self, integrated_data: Dict[str, Any], sources: List[str]) -> Dict[str, Any]:
"""Extract data from specified sources."""
source_data = {}
@@ -362,22 +422,34 @@ class FieldTransformationService:
return None
def extract_competitive_position(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract competitive position from competitor data."""
"""Extract and normalize competitive position to one of Leader, Challenger, Niche, Emerging."""
try:
position_indicators = []
text_blobs: list[str] = []
if 'website_analysis.competitors' in source_data:
competitors = source_data['website_analysis.competitors']
if competitors:
position_indicators.append(f"Competitors: {competitors}")
if isinstance(competitors, (str, list, dict)):
text_blobs.append(str(competitors))
if 'research_preferences.competitor_analysis' in source_data:
analysis = source_data['research_preferences.competitor_analysis']
if analysis:
position_indicators.append(f"Analysis: {analysis}")
return '; '.join(position_indicators) if position_indicators else None
if isinstance(analysis, (str, list, dict)):
text_blobs.append(str(analysis))
blob = ' '.join(text_blobs).lower()
# Simple keyword heuristics
if any(kw in blob for kw in ['leader', 'market leader', 'category leader', 'dominant']):
return 'Leader'
if any(kw in blob for kw in ['challenger', 'fast follower', 'aggressive']):
return 'Challenger'
if any(kw in blob for kw in ['niche', 'niche player', 'specialized']):
return 'Niche'
if any(kw in blob for kw in ['emerging', 'new entrant', 'startup', 'growing']):
return 'Emerging'
# No clear signal; let default take over
return None
except Exception as e:
logger.error(f"Error extracting competitive position: {str(e)}")
return None
@@ -427,6 +499,15 @@ class FieldTransformationService:
if research_audience:
patterns.append(f"Research Audience: {research_audience}")
# If we have consumption data as a dict, format it nicely
if isinstance(integrated_data.get('consumption_patterns'), dict):
consumption_data = integrated_data['consumption_patterns']
if isinstance(consumption_data, dict):
formatted_patterns = []
for platform, percentage in consumption_data.items():
formatted_patterns.append(f"{platform.title()}: {percentage}%")
patterns.append(', '.join(formatted_patterns))
return '; '.join(patterns) if patterns else None
except Exception as e:
@@ -465,6 +546,16 @@ class FieldTransformationService:
audience = source_data['website_analysis.target_audience']
if audience:
return f"Journey based on: {audience}"
# If we have buying journey data as a dict, format it nicely
if isinstance(integrated_data.get('buying_journey'), dict):
journey_data = integrated_data['buying_journey']
if isinstance(journey_data, dict):
formatted_journey = []
for stage, percentage in journey_data.items():
formatted_journey.append(f"{stage.title()}: {percentage}%")
return ', '.join(formatted_journey)
return None
except Exception as e:
@@ -599,16 +690,51 @@ class FieldTransformationService:
return None
def extract_preferred_formats(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
"""Extract preferred content formats."""
"""Extract preferred content formats and normalize to UI option labels array."""
try:
def to_canonical(label: str) -> Optional[str]:
normalized = label.strip().lower()
mapping = {
'blog': 'Blog Posts',
'blog post': 'Blog Posts',
'blog posts': 'Blog Posts',
'article': 'Blog Posts',
'articles': 'Blog Posts',
'video': 'Videos',
'videos': 'Videos',
'infographic': 'Infographics',
'infographics': 'Infographics',
'webinar': 'Webinars',
'webinars': 'Webinars',
'podcast': 'Podcasts',
'podcasts': 'Podcasts',
'case study': 'Case Studies',
'case studies': 'Case Studies',
'whitepaper': 'Whitepapers',
'whitepapers': 'Whitepapers',
'social': 'Social Media Posts',
'social media': 'Social Media Posts',
'social media posts': 'Social Media Posts'
}
return mapping.get(normalized, None)
if 'research_preferences.content_types' in source_data:
content_types = source_data['research_preferences.content_types']
canonical: list[str] = []
if isinstance(content_types, list):
return ', '.join(content_types)
for item in content_types:
if isinstance(item, str):
canon = to_canonical(item)
if canon and canon not in canonical:
canonical.append(canon)
elif isinstance(content_types, str):
return content_types
for part in content_types.split(','):
canon = to_canonical(part)
if canon and canon not in canonical:
canonical.append(canon)
if canonical:
return canonical
return None
except Exception as e:
logger.error(f"Error extracting preferred formats: {str(e)}")
return None
@@ -654,6 +780,20 @@ class FieldTransformationService:
calendar = source_data['research_preferences.content_calendar']
if calendar:
return str(calendar)
# If we have optimal timing data as a dict, format it nicely
if isinstance(integrated_data.get('optimal_timing'), dict):
timing_data = integrated_data['optimal_timing']
if isinstance(timing_data, dict):
formatted_timing = []
if 'best_days' in timing_data:
days = timing_data['best_days']
if isinstance(days, list):
formatted_timing.append(f"Best Days: {', '.join(days)}")
if 'best_time' in timing_data:
formatted_timing.append(f"Best Time: {timing_data['best_time']}")
return ', '.join(formatted_timing)
return None
except Exception as e:
@@ -668,7 +808,19 @@ class FieldTransformationService:
if isinstance(metrics, dict):
quality_metrics = {k: v for k, v in metrics.items() if 'quality' in k.lower()}
if quality_metrics:
return ', '.join([f"{k}: {v}" for k, v in quality_metrics.items()])
return ', '.join([f"{k.title()}: {v}" for k, v in quality_metrics.items()])
elif isinstance(metrics, str):
return metrics
# If we have quality metrics data as a dict, format it nicely
if isinstance(integrated_data.get('quality_metrics'), dict):
quality_data = integrated_data['quality_metrics']
if isinstance(quality_data, dict):
formatted_metrics = []
for metric, value in quality_data.items():
formatted_metrics.append(f"{metric.title()}: {value}")
return ', '.join(formatted_metrics)
return None
except Exception as e:
@@ -725,7 +877,9 @@ class FieldTransformationService:
if isinstance(metrics, dict):
traffic_metrics = {k: v for k, v in metrics.items() if 'traffic' in k.lower()}
if traffic_metrics:
return ', '.join([f"{k}: {v}" for k, v in traffic_metrics.items()])
return ', '.join([f"{k.title()}: {v}%" for k, v in traffic_metrics.items()])
elif isinstance(metrics, str):
return metrics
return None
except Exception as e:
@@ -740,7 +894,9 @@ class FieldTransformationService:
if isinstance(metrics, dict):
conversion_metrics = {k: v for k, v in metrics.items() if 'conversion' in k.lower()}
if conversion_metrics:
return ', '.join([f"{k}: {v}" for k, v in conversion_metrics.items()])
return ', '.join([f"{k.title()}: {v}%" for k, v in conversion_metrics.items()])
elif isinstance(metrics, str):
return metrics
return None
except Exception as e:
@@ -770,21 +926,135 @@ class FieldTransformationService:
logger.error(f"Error extracting ROI targets: {str(e)}")
return None
def extract_ab_testing_capabilities(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[str]:
def extract_ab_testing_capabilities(self, source_data: Dict[str, Any], integrated_data: Dict[str, Any]) -> Optional[bool]:
"""Extract A/B testing capabilities from team size."""
try:
if 'onboarding_session.session_data.team_size' in source_data:
team_size = source_data['onboarding_session.session_data.team_size']
if team_size:
# Simple logic based on team size
if int(team_size) > 5:
return "Advanced A/B testing capabilities"
elif int(team_size) > 2:
return "Basic A/B testing capabilities"
else:
return "Limited A/B testing capabilities"
return None
# Return boolean based on team size
team_size_int = int(team_size) if isinstance(team_size, (str, int, float)) else 1
return team_size_int > 2 # True if team size > 2, False otherwise
# Default to False if no team size data
return False
except Exception as e:
logger.error(f"Error extracting A/B testing capabilities: {str(e)}")
return False
def _get_default_value_for_field(self, field_name: str) -> Any:
"""Get default value for a field when no data is available."""
# Provide sensible defaults for required fields
default_values = {
'business_objectives': 'Lead Generation, Brand Awareness',
'target_metrics': 'Traffic Growth: 30%, Engagement Rate: 5%, Conversion Rate: 2%',
'content_budget': 1000,
'team_size': 1,
'implementation_timeline': '3 months',
'market_share': 'Small but growing',
'competitive_position': 'Niche',
'performance_metrics': 'Current Traffic: 1000, Current Engagement: 3%',
'content_preferences': 'Blog posts, Social media content',
'consumption_patterns': 'Mobile: 60%, Desktop: 40%',
'audience_pain_points': 'Time constraints, Content quality',
'buying_journey': 'Awareness: 40%, Consideration: 35%, Decision: 25%',
'seasonal_trends': 'Q4 peak, Summer slowdown',
'engagement_metrics': 'Likes: 100, Shares: 20, Comments: 15',
'top_competitors': 'Competitor A, Competitor B',
'competitor_content_strategies': 'Blog-focused, Video-heavy',
'market_gaps': 'Underserved niche, Content gap',
'industry_trends': 'AI integration, Video content',
'emerging_trends': 'Voice search, Interactive content',
'preferred_formats': ['Blog Posts', 'Videos', 'Infographics'],
'content_mix': 'Educational: 40%, Entertaining: 30%, Promotional: 30%',
'content_frequency': 'Weekly',
'optimal_timing': 'Best Days: Tuesday, Thursday, Best Time: 10 AM',
'quality_metrics': 'Readability: 8, Engagement: 7, SEO Score: 6',
'editorial_guidelines': 'Professional tone, Clear structure',
'brand_voice': 'Professional yet approachable',
'traffic_sources': 'Organic: 60%, Social: 25%, Direct: 15%',
'conversion_rates': 'Overall: 2%, Blog: 3%, Landing Pages: 5%',
'content_roi_targets': 'Target ROI: 300%, Break Even: 6 months',
'ab_testing_capabilities': False
}
return default_values.get(field_name, None)
def _default_transformation(self, source_data: Dict[str, Any], field_name: str) -> Any:
"""Default transformation when no specific method is available."""
try:
# Try to find any non-empty value in source data
for key, value in source_data.items():
if value is not None and value != "":
# For budget and team_size, try to convert to number
if field_name in ['content_budget', 'team_size'] and isinstance(value, (str, int, float)):
try:
return int(value) if field_name == 'team_size' else float(value)
except (ValueError, TypeError):
continue
# For other fields, return the first non-empty value
return value
# If no value found, return None
return None
except Exception as e:
logger.error(f"Error in default transformation for {field_name}: {str(e)}")
return None
def _calculate_field_confidence(self, source_data: Dict[str, Any], sources: List[str]) -> float:
"""Calculate confidence score for a field based on data quality and source availability."""
try:
if not source_data:
return 0.3 # Low confidence when no data
# Check data quality indicators
data_quality_score = 0.0
total_indicators = 0
# Check if data is not empty
for key, value in source_data.items():
if value is not None and value != "":
data_quality_score += 1.0
total_indicators += 1
# Check source availability
source_availability = len([s for s in sources if self._has_source_data(source_data, s)]) / max(len(sources), 1)
# Calculate final confidence
if total_indicators > 0:
data_quality = data_quality_score / total_indicators
confidence = (data_quality + source_availability) / 2
return min(confidence, 1.0) # Cap at 1.0
else:
return 0.3 # Default low confidence
except Exception as e:
logger.error(f"Error calculating field confidence: {str(e)}")
return 0.3 # Default low confidence
def _has_source_data(self, integrated_data: Dict[str, Any], source_path: str) -> bool:
"""Check if source data exists in integrated data."""
try:
value = self._get_nested_value(integrated_data, source_path)
return value is not None and value != ""
except Exception as e:
logger.debug(f"Error checking source data for {source_path}: {str(e)}")
return False
def _get_nested_value(self, data: Dict[str, Any], path: str) -> Any:
"""Get nested value from dictionary using dot notation."""
try:
keys = path.split('.')
value = data
for key in keys:
if isinstance(value, dict) and key in value:
value = value[key]
else:
return None
return value
except Exception as e:
logger.debug(f"Error getting nested value for {path}: {str(e)}")
return None