Files
moreminimore-marketing/backend/services/onboarding/data_service.py
Kunthawat Greethong c35fa52117 Base code
2026-01-08 22:39:53 +07:00

292 lines
13 KiB
Python

"""
Onboarding Data Service
Extracts real user data from onboarding to personalize AI inputs
"""
from typing import Dict, Any, List, Optional
from sqlalchemy.orm import Session
from loguru import logger
from datetime import datetime
import json
from services.database import get_db_session
from models.onboarding import OnboardingSession, WebsiteAnalysis, ResearchPreferences
class OnboardingDataService:
"""Service to extract and use real onboarding data for AI personalization."""
def __init__(self, db: Optional[Session] = None):
"""Initialize the onboarding data service."""
self.db = db
logger.info("OnboardingDataService initialized")
def get_user_website_analysis(self, user_id: int) -> Optional[Dict[str, Any]]:
"""
Get website analysis data for a specific user.
Args:
user_id: User ID to get data for
Returns:
Website analysis data or None if not found
"""
try:
session = self.db or get_db_session()
# Find onboarding session for user
onboarding_session = session.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).first()
if not onboarding_session:
logger.warning(f"No onboarding session found for user {user_id}")
return None
# Get website analysis for this session
website_analysis = session.query(WebsiteAnalysis).filter(
WebsiteAnalysis.session_id == onboarding_session.id
).first()
if not website_analysis:
logger.warning(f"No website analysis found for user {user_id}")
return None
return website_analysis.to_dict()
except Exception as e:
logger.error(f"Error getting website analysis for user {user_id}: {str(e)}")
return None
def get_user_research_preferences(self, user_id: int) -> Optional[Dict[str, Any]]:
"""
Get research preferences for a specific user.
Args:
user_id: User ID to get data for
Returns:
Research preferences data or None if not found
"""
try:
session = self.db or get_db_session()
# Find onboarding session for user
onboarding_session = session.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).first()
if not onboarding_session:
logger.warning(f"No onboarding session found for user {user_id}")
return None
# Get research preferences for this session
research_prefs = session.query(ResearchPreferences).filter(
ResearchPreferences.session_id == onboarding_session.id
).first()
if not research_prefs:
logger.warning(f"No research preferences found for user {user_id}")
return None
return research_prefs.to_dict()
except Exception as e:
logger.error(f"Error getting research preferences for user {user_id}: {str(e)}")
return None
def get_personalized_ai_inputs(self, user_id: int) -> Dict[str, Any]:
"""
Get personalized AI inputs based on user's onboarding data.
Args:
user_id: User ID to get personalized data for
Returns:
Personalized data for AI analysis
"""
try:
logger.info(f"Getting personalized AI inputs for user {user_id}")
# Get website analysis
website_analysis = self.get_user_website_analysis(user_id)
research_prefs = self.get_user_research_preferences(user_id)
if not website_analysis:
logger.warning(f"No onboarding data found for user {user_id}, using defaults")
return self._get_default_ai_inputs()
# Extract real data from website analysis
writing_style = website_analysis.get('writing_style', {})
target_audience = website_analysis.get('target_audience', {})
content_type = website_analysis.get('content_type', {})
recommended_settings = website_analysis.get('recommended_settings', {})
# Build personalized AI inputs
personalized_inputs = {
"website_analysis": {
"website_url": website_analysis.get('website_url', ''),
"content_types": self._extract_content_types(content_type),
"writing_style": writing_style.get('tone', 'professional'),
"target_audience": target_audience.get('demographics', ['professionals']),
"industry_focus": target_audience.get('industry_focus', 'general'),
"expertise_level": target_audience.get('expertise_level', 'intermediate')
},
"competitor_analysis": {
"top_performers": self._generate_competitor_suggestions(target_audience),
"industry": target_audience.get('industry_focus', 'general'),
"target_demographics": target_audience.get('demographics', [])
},
"gap_analysis": {
"content_gaps": self._identify_content_gaps(content_type, writing_style),
"target_keywords": self._generate_target_keywords(target_audience),
"content_opportunities": self._identify_opportunities(content_type)
},
"keyword_analysis": {
"high_value_keywords": self._generate_high_value_keywords(target_audience),
"content_topics": self._generate_content_topics(content_type),
"search_intent": self._analyze_search_intent(target_audience)
}
}
# Add research preferences if available
if research_prefs:
personalized_inputs["research_preferences"] = {
"research_depth": research_prefs.get('research_depth', 'Standard'),
"content_types": research_prefs.get('content_types', []),
"auto_research": research_prefs.get('auto_research', True),
"factual_content": research_prefs.get('factual_content', True)
}
logger.info(f"✅ Generated personalized AI inputs for user {user_id}")
return personalized_inputs
except Exception as e:
logger.error(f"Error generating personalized AI inputs for user {user_id}: {str(e)}")
return self._get_default_ai_inputs()
def _extract_content_types(self, content_type: Dict[str, Any]) -> List[str]:
"""Extract content types from content type analysis."""
types = []
if content_type.get('primary_type'):
types.append(content_type['primary_type'])
if content_type.get('secondary_types'):
types.extend(content_type['secondary_types'])
return types if types else ['blog', 'article']
def _generate_competitor_suggestions(self, target_audience: Dict[str, Any]) -> List[str]:
"""Generate competitor suggestions based on target audience."""
industry = target_audience.get('industry_focus', 'general')
demographics = target_audience.get('demographics', ['professionals'])
# Generate industry-specific competitors
if industry == 'technology':
return ['techcrunch.com', 'wired.com', 'theverge.com']
elif industry == 'marketing':
return ['hubspot.com', 'marketingland.com', 'moz.com']
else:
return ['competitor1.com', 'competitor2.com', 'competitor3.com']
def _identify_content_gaps(self, content_type: Dict[str, Any], writing_style: Dict[str, Any]) -> List[str]:
"""Identify content gaps based on current content type and style."""
gaps = []
primary_type = content_type.get('primary_type', 'blog')
if primary_type == 'blog':
gaps.extend(['Video tutorials', 'Case studies', 'Infographics'])
elif primary_type == 'video':
gaps.extend(['Blog posts', 'Whitepapers', 'Webinars'])
# Add style-based gaps
tone = writing_style.get('tone', 'professional')
if tone == 'professional':
gaps.append('Personal stories')
elif tone == 'casual':
gaps.append('Expert interviews')
return gaps
def _generate_target_keywords(self, target_audience: Dict[str, Any]) -> List[str]:
"""Generate target keywords based on audience analysis."""
industry = target_audience.get('industry_focus', 'general')
expertise = target_audience.get('expertise_level', 'intermediate')
if industry == 'technology':
return ['AI tools', 'Digital transformation', 'Tech trends']
elif industry == 'marketing':
return ['Content marketing', 'SEO strategies', 'Social media']
else:
return ['Industry insights', 'Best practices', 'Expert tips']
def _identify_opportunities(self, content_type: Dict[str, Any]) -> List[str]:
"""Identify content opportunities based on current content type."""
opportunities = []
purpose = content_type.get('purpose', 'informational')
if purpose == 'informational':
opportunities.extend(['How-to guides', 'Tutorials', 'Educational content'])
elif purpose == 'promotional':
opportunities.extend(['Case studies', 'Testimonials', 'Success stories'])
return opportunities
def _generate_high_value_keywords(self, target_audience: Dict[str, Any]) -> List[str]:
"""Generate high-value keywords based on audience analysis."""
industry = target_audience.get('industry_focus', 'general')
if industry == 'technology':
return ['AI marketing', 'Content automation', 'Digital strategy']
elif industry == 'marketing':
return ['Content marketing', 'SEO optimization', 'Social media strategy']
else:
return ['Industry trends', 'Best practices', 'Expert insights']
def _generate_content_topics(self, content_type: Dict[str, Any]) -> List[str]:
"""Generate content topics based on content type analysis."""
topics = []
primary_type = content_type.get('primary_type', 'blog')
if primary_type == 'blog':
topics.extend(['Industry trends', 'How-to guides', 'Expert insights'])
elif primary_type == 'video':
topics.extend(['Tutorials', 'Product demos', 'Expert interviews'])
return topics
def _analyze_search_intent(self, target_audience: Dict[str, Any]) -> Dict[str, Any]:
"""Analyze search intent based on target audience."""
expertise = target_audience.get('expertise_level', 'intermediate')
if expertise == 'beginner':
return {'intent': 'educational', 'focus': 'basic concepts'}
elif expertise == 'intermediate':
return {'intent': 'practical', 'focus': 'implementation'}
else:
return {'intent': 'advanced', 'focus': 'strategic insights'}
def _get_default_ai_inputs(self) -> Dict[str, Any]:
"""Get default AI inputs when no onboarding data is available."""
return {
"website_analysis": {
"content_types": ["blog", "video", "social"],
"writing_style": "professional",
"target_audience": ["professionals"],
"industry_focus": "general",
"expertise_level": "intermediate"
},
"competitor_analysis": {
"top_performers": ["competitor1.com", "competitor2.com"],
"industry": "general",
"target_demographics": ["professionals"]
},
"gap_analysis": {
"content_gaps": ["AI content", "Video tutorials", "Case studies"],
"target_keywords": ["Industry insights", "Best practices"],
"content_opportunities": ["How-to guides", "Tutorials"]
},
"keyword_analysis": {
"high_value_keywords": ["AI marketing", "Content automation", "Digital strategy"],
"content_topics": ["Industry trends", "Expert insights"],
"search_intent": {"intent": "practical", "focus": "implementation"}
}
}