Base code
This commit is contained in:
291
backend/services/onboarding/data_service.py
Normal file
291
backend/services/onboarding/data_service.py
Normal file
@@ -0,0 +1,291 @@
|
||||
"""
|
||||
Onboarding Data Service
|
||||
Extracts real user data from onboarding to personalize AI inputs
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from loguru import logger
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
from services.database import get_db_session
|
||||
from models.onboarding import OnboardingSession, WebsiteAnalysis, ResearchPreferences
|
||||
|
||||
class OnboardingDataService:
|
||||
"""Service to extract and use real onboarding data for AI personalization."""
|
||||
|
||||
def __init__(self, db: Optional[Session] = None):
|
||||
"""Initialize the onboarding data service."""
|
||||
self.db = db
|
||||
logger.info("OnboardingDataService initialized")
|
||||
|
||||
def get_user_website_analysis(self, user_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get website analysis data for a specific user.
|
||||
|
||||
Args:
|
||||
user_id: User ID to get data for
|
||||
|
||||
Returns:
|
||||
Website analysis data or None if not found
|
||||
"""
|
||||
try:
|
||||
session = self.db or get_db_session()
|
||||
|
||||
# Find onboarding session for user
|
||||
onboarding_session = session.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not onboarding_session:
|
||||
logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return None
|
||||
|
||||
# Get website analysis for this session
|
||||
website_analysis = session.query(WebsiteAnalysis).filter(
|
||||
WebsiteAnalysis.session_id == onboarding_session.id
|
||||
).first()
|
||||
|
||||
if not website_analysis:
|
||||
logger.warning(f"No website analysis found for user {user_id}")
|
||||
return None
|
||||
|
||||
return website_analysis.to_dict()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting website analysis for user {user_id}: {str(e)}")
|
||||
return None
|
||||
|
||||
def get_user_research_preferences(self, user_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get research preferences for a specific user.
|
||||
|
||||
Args:
|
||||
user_id: User ID to get data for
|
||||
|
||||
Returns:
|
||||
Research preferences data or None if not found
|
||||
"""
|
||||
try:
|
||||
session = self.db or get_db_session()
|
||||
|
||||
# Find onboarding session for user
|
||||
onboarding_session = session.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not onboarding_session:
|
||||
logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return None
|
||||
|
||||
# Get research preferences for this session
|
||||
research_prefs = session.query(ResearchPreferences).filter(
|
||||
ResearchPreferences.session_id == onboarding_session.id
|
||||
).first()
|
||||
|
||||
if not research_prefs:
|
||||
logger.warning(f"No research preferences found for user {user_id}")
|
||||
return None
|
||||
|
||||
return research_prefs.to_dict()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting research preferences for user {user_id}: {str(e)}")
|
||||
return None
|
||||
|
||||
def get_personalized_ai_inputs(self, user_id: int) -> Dict[str, Any]:
|
||||
"""
|
||||
Get personalized AI inputs based on user's onboarding data.
|
||||
|
||||
Args:
|
||||
user_id: User ID to get personalized data for
|
||||
|
||||
Returns:
|
||||
Personalized data for AI analysis
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Getting personalized AI inputs for user {user_id}")
|
||||
|
||||
# Get website analysis
|
||||
website_analysis = self.get_user_website_analysis(user_id)
|
||||
research_prefs = self.get_user_research_preferences(user_id)
|
||||
|
||||
if not website_analysis:
|
||||
logger.warning(f"No onboarding data found for user {user_id}, using defaults")
|
||||
return self._get_default_ai_inputs()
|
||||
|
||||
# Extract real data from website analysis
|
||||
writing_style = website_analysis.get('writing_style', {})
|
||||
target_audience = website_analysis.get('target_audience', {})
|
||||
content_type = website_analysis.get('content_type', {})
|
||||
recommended_settings = website_analysis.get('recommended_settings', {})
|
||||
|
||||
# Build personalized AI inputs
|
||||
personalized_inputs = {
|
||||
"website_analysis": {
|
||||
"website_url": website_analysis.get('website_url', ''),
|
||||
"content_types": self._extract_content_types(content_type),
|
||||
"writing_style": writing_style.get('tone', 'professional'),
|
||||
"target_audience": target_audience.get('demographics', ['professionals']),
|
||||
"industry_focus": target_audience.get('industry_focus', 'general'),
|
||||
"expertise_level": target_audience.get('expertise_level', 'intermediate')
|
||||
},
|
||||
"competitor_analysis": {
|
||||
"top_performers": self._generate_competitor_suggestions(target_audience),
|
||||
"industry": target_audience.get('industry_focus', 'general'),
|
||||
"target_demographics": target_audience.get('demographics', [])
|
||||
},
|
||||
"gap_analysis": {
|
||||
"content_gaps": self._identify_content_gaps(content_type, writing_style),
|
||||
"target_keywords": self._generate_target_keywords(target_audience),
|
||||
"content_opportunities": self._identify_opportunities(content_type)
|
||||
},
|
||||
"keyword_analysis": {
|
||||
"high_value_keywords": self._generate_high_value_keywords(target_audience),
|
||||
"content_topics": self._generate_content_topics(content_type),
|
||||
"search_intent": self._analyze_search_intent(target_audience)
|
||||
}
|
||||
}
|
||||
|
||||
# Add research preferences if available
|
||||
if research_prefs:
|
||||
personalized_inputs["research_preferences"] = {
|
||||
"research_depth": research_prefs.get('research_depth', 'Standard'),
|
||||
"content_types": research_prefs.get('content_types', []),
|
||||
"auto_research": research_prefs.get('auto_research', True),
|
||||
"factual_content": research_prefs.get('factual_content', True)
|
||||
}
|
||||
|
||||
logger.info(f"✅ Generated personalized AI inputs for user {user_id}")
|
||||
return personalized_inputs
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating personalized AI inputs for user {user_id}: {str(e)}")
|
||||
return self._get_default_ai_inputs()
|
||||
|
||||
def _extract_content_types(self, content_type: Dict[str, Any]) -> List[str]:
|
||||
"""Extract content types from content type analysis."""
|
||||
types = []
|
||||
if content_type.get('primary_type'):
|
||||
types.append(content_type['primary_type'])
|
||||
if content_type.get('secondary_types'):
|
||||
types.extend(content_type['secondary_types'])
|
||||
return types if types else ['blog', 'article']
|
||||
|
||||
def _generate_competitor_suggestions(self, target_audience: Dict[str, Any]) -> List[str]:
|
||||
"""Generate competitor suggestions based on target audience."""
|
||||
industry = target_audience.get('industry_focus', 'general')
|
||||
demographics = target_audience.get('demographics', ['professionals'])
|
||||
|
||||
# Generate industry-specific competitors
|
||||
if industry == 'technology':
|
||||
return ['techcrunch.com', 'wired.com', 'theverge.com']
|
||||
elif industry == 'marketing':
|
||||
return ['hubspot.com', 'marketingland.com', 'moz.com']
|
||||
else:
|
||||
return ['competitor1.com', 'competitor2.com', 'competitor3.com']
|
||||
|
||||
def _identify_content_gaps(self, content_type: Dict[str, Any], writing_style: Dict[str, Any]) -> List[str]:
|
||||
"""Identify content gaps based on current content type and style."""
|
||||
gaps = []
|
||||
primary_type = content_type.get('primary_type', 'blog')
|
||||
|
||||
if primary_type == 'blog':
|
||||
gaps.extend(['Video tutorials', 'Case studies', 'Infographics'])
|
||||
elif primary_type == 'video':
|
||||
gaps.extend(['Blog posts', 'Whitepapers', 'Webinars'])
|
||||
|
||||
# Add style-based gaps
|
||||
tone = writing_style.get('tone', 'professional')
|
||||
if tone == 'professional':
|
||||
gaps.append('Personal stories')
|
||||
elif tone == 'casual':
|
||||
gaps.append('Expert interviews')
|
||||
|
||||
return gaps
|
||||
|
||||
def _generate_target_keywords(self, target_audience: Dict[str, Any]) -> List[str]:
|
||||
"""Generate target keywords based on audience analysis."""
|
||||
industry = target_audience.get('industry_focus', 'general')
|
||||
expertise = target_audience.get('expertise_level', 'intermediate')
|
||||
|
||||
if industry == 'technology':
|
||||
return ['AI tools', 'Digital transformation', 'Tech trends']
|
||||
elif industry == 'marketing':
|
||||
return ['Content marketing', 'SEO strategies', 'Social media']
|
||||
else:
|
||||
return ['Industry insights', 'Best practices', 'Expert tips']
|
||||
|
||||
def _identify_opportunities(self, content_type: Dict[str, Any]) -> List[str]:
|
||||
"""Identify content opportunities based on current content type."""
|
||||
opportunities = []
|
||||
purpose = content_type.get('purpose', 'informational')
|
||||
|
||||
if purpose == 'informational':
|
||||
opportunities.extend(['How-to guides', 'Tutorials', 'Educational content'])
|
||||
elif purpose == 'promotional':
|
||||
opportunities.extend(['Case studies', 'Testimonials', 'Success stories'])
|
||||
|
||||
return opportunities
|
||||
|
||||
def _generate_high_value_keywords(self, target_audience: Dict[str, Any]) -> List[str]:
|
||||
"""Generate high-value keywords based on audience analysis."""
|
||||
industry = target_audience.get('industry_focus', 'general')
|
||||
|
||||
if industry == 'technology':
|
||||
return ['AI marketing', 'Content automation', 'Digital strategy']
|
||||
elif industry == 'marketing':
|
||||
return ['Content marketing', 'SEO optimization', 'Social media strategy']
|
||||
else:
|
||||
return ['Industry trends', 'Best practices', 'Expert insights']
|
||||
|
||||
def _generate_content_topics(self, content_type: Dict[str, Any]) -> List[str]:
|
||||
"""Generate content topics based on content type analysis."""
|
||||
topics = []
|
||||
primary_type = content_type.get('primary_type', 'blog')
|
||||
|
||||
if primary_type == 'blog':
|
||||
topics.extend(['Industry trends', 'How-to guides', 'Expert insights'])
|
||||
elif primary_type == 'video':
|
||||
topics.extend(['Tutorials', 'Product demos', 'Expert interviews'])
|
||||
|
||||
return topics
|
||||
|
||||
def _analyze_search_intent(self, target_audience: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Analyze search intent based on target audience."""
|
||||
expertise = target_audience.get('expertise_level', 'intermediate')
|
||||
|
||||
if expertise == 'beginner':
|
||||
return {'intent': 'educational', 'focus': 'basic concepts'}
|
||||
elif expertise == 'intermediate':
|
||||
return {'intent': 'practical', 'focus': 'implementation'}
|
||||
else:
|
||||
return {'intent': 'advanced', 'focus': 'strategic insights'}
|
||||
|
||||
def _get_default_ai_inputs(self) -> Dict[str, Any]:
|
||||
"""Get default AI inputs when no onboarding data is available."""
|
||||
return {
|
||||
"website_analysis": {
|
||||
"content_types": ["blog", "video", "social"],
|
||||
"writing_style": "professional",
|
||||
"target_audience": ["professionals"],
|
||||
"industry_focus": "general",
|
||||
"expertise_level": "intermediate"
|
||||
},
|
||||
"competitor_analysis": {
|
||||
"top_performers": ["competitor1.com", "competitor2.com"],
|
||||
"industry": "general",
|
||||
"target_demographics": ["professionals"]
|
||||
},
|
||||
"gap_analysis": {
|
||||
"content_gaps": ["AI content", "Video tutorials", "Case studies"],
|
||||
"target_keywords": ["Industry insights", "Best practices"],
|
||||
"content_opportunities": ["How-to guides", "Tutorials"]
|
||||
},
|
||||
"keyword_analysis": {
|
||||
"high_value_keywords": ["AI marketing", "Content automation", "Digital strategy"],
|
||||
"content_topics": ["Industry trends", "Expert insights"],
|
||||
"search_intent": {"intent": "practical", "focus": "implementation"}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user