Added documentation for the auto-population feature and the analytics integration.
This commit is contained in:
@@ -17,8 +17,11 @@ from models.onboarding import (
|
||||
OnboardingSession,
|
||||
WebsiteAnalysis,
|
||||
ResearchPreferences,
|
||||
APIKey
|
||||
APIKey,
|
||||
PersonaData,
|
||||
CompetitorAnalysis
|
||||
)
|
||||
import os
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -29,8 +32,13 @@ class OnboardingDataIntegrationService:
|
||||
self.data_freshness_threshold = timedelta(hours=24)
|
||||
self.max_analysis_age = timedelta(days=7)
|
||||
|
||||
async def process_onboarding_data(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
"""Process and integrate all onboarding data for a user."""
|
||||
async def process_onboarding_data(self, user_id: str, db: Session) -> Dict[str, Any]:
|
||||
"""Process and integrate all onboarding data for a user.
|
||||
|
||||
Args:
|
||||
user_id: Clerk user ID (string format, e.g., 'user_xxx')
|
||||
db: Database session
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Processing onboarding data for user: {user_id}")
|
||||
|
||||
@@ -39,6 +47,10 @@ class OnboardingDataIntegrationService:
|
||||
research_preferences = self._get_research_preferences(user_id, db)
|
||||
api_keys_data = self._get_api_keys_data(user_id, db)
|
||||
onboarding_session = self._get_onboarding_session(user_id, db)
|
||||
persona_data = self._get_persona_data(user_id, db)
|
||||
competitor_analysis = self._get_competitor_analysis(user_id, db)
|
||||
gsc_analytics = await self._get_gsc_analytics(user_id)
|
||||
bing_analytics = await self._get_bing_analytics(user_id)
|
||||
|
||||
# Log data source status
|
||||
logger.info(f"Data source status for user {user_id}:")
|
||||
@@ -46,6 +58,10 @@ class OnboardingDataIntegrationService:
|
||||
logger.info(f" - Research preferences: {'✅ Found' if research_preferences else '❌ Missing'}")
|
||||
logger.info(f" - API keys data: {'✅ Found' if api_keys_data else '❌ Missing'}")
|
||||
logger.info(f" - Onboarding session: {'✅ Found' if onboarding_session else '❌ Missing'}")
|
||||
logger.info(f" - Persona data: {'✅ Found' if persona_data else '❌ Missing'}")
|
||||
logger.info(f" - Competitor analysis: {'✅ Found' if competitor_analysis else '❌ Missing'}")
|
||||
logger.info(f" - GSC Analytics: {'✅ Found' if gsc_analytics else '❌ Missing'}")
|
||||
logger.info(f" - Bing Analytics: {'✅ Found' if bing_analytics else '❌ Missing'}")
|
||||
|
||||
# Process and integrate data
|
||||
integrated_data = {
|
||||
@@ -53,7 +69,11 @@ class OnboardingDataIntegrationService:
|
||||
'research_preferences': research_preferences,
|
||||
'api_keys_data': api_keys_data,
|
||||
'onboarding_session': onboarding_session,
|
||||
'data_quality': self._assess_data_quality(website_analysis, research_preferences, api_keys_data),
|
||||
'persona_data': persona_data,
|
||||
'competitor_analysis': competitor_analysis,
|
||||
'gsc_analytics': gsc_analytics,
|
||||
'bing_analytics': bing_analytics,
|
||||
'data_quality': self._assess_data_quality(website_analysis, research_preferences, api_keys_data, persona_data, competitor_analysis, gsc_analytics, bing_analytics),
|
||||
'processing_timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
@@ -76,7 +96,7 @@ class OnboardingDataIntegrationService:
|
||||
logger.error("Traceback:\n%s", traceback.format_exc())
|
||||
return self._get_fallback_data()
|
||||
|
||||
def _get_website_analysis(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
def _get_website_analysis(self, user_id: str, db: Session) -> Dict[str, Any]:
|
||||
"""Get website analysis data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
@@ -109,7 +129,7 @@ class OnboardingDataIntegrationService:
|
||||
logger.error(f"Error getting website analysis for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_research_preferences(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
def _get_research_preferences(self, user_id: str, db: Session) -> Dict[str, Any]:
|
||||
"""Get research preferences data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
@@ -142,7 +162,7 @@ class OnboardingDataIntegrationService:
|
||||
logger.error(f"Error getting research preferences for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_api_keys_data(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
def _get_api_keys_data(self, user_id: str, db: Session) -> Dict[str, Any]:
|
||||
"""Get API keys data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
@@ -179,7 +199,7 @@ class OnboardingDataIntegrationService:
|
||||
logger.error(f"Error getting API keys data for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_onboarding_session(self, user_id: int, db: Session) -> Dict[str, Any]:
|
||||
def _get_onboarding_session(self, user_id: str, db: Session) -> Dict[str, Any]:
|
||||
"""Get onboarding session data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
@@ -210,7 +230,7 @@ class OnboardingDataIntegrationService:
|
||||
logger.error(f"Error getting onboarding session for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _assess_data_quality(self, website_analysis: Dict, research_preferences: Dict, api_keys_data: Dict) -> Dict[str, Any]:
|
||||
def _assess_data_quality(self, website_analysis: Dict, research_preferences: Dict, api_keys_data: Dict, persona_data: Dict = None, competitor_analysis: List = None, gsc_analytics: Dict = None, bing_analytics: Dict = None) -> Dict[str, Any]:
|
||||
"""Assess the quality and completeness of onboarding data."""
|
||||
try:
|
||||
quality_metrics = {
|
||||
@@ -244,6 +264,26 @@ class OnboardingDataIntegrationService:
|
||||
if api_keys_data:
|
||||
filled_fields += 1
|
||||
|
||||
# Persona data completeness
|
||||
total_fields += 1
|
||||
if persona_data and persona_data.get('core_persona'):
|
||||
filled_fields += 1
|
||||
|
||||
# Competitor analysis completeness
|
||||
total_fields += 1
|
||||
if competitor_analysis and len(competitor_analysis) > 0:
|
||||
filled_fields += 1
|
||||
|
||||
# GSC analytics completeness
|
||||
total_fields += 1
|
||||
if gsc_analytics and (gsc_analytics.get('data') or gsc_analytics.get('metrics')):
|
||||
filled_fields += 1
|
||||
|
||||
# Bing analytics completeness
|
||||
total_fields += 1
|
||||
if bing_analytics and (bing_analytics.get('data') or bing_analytics.get('summary')):
|
||||
filled_fields += 1
|
||||
|
||||
quality_metrics['completeness'] = filled_fields / total_fields if total_fields > 0 else 0.0
|
||||
|
||||
# Calculate freshness
|
||||
@@ -251,17 +291,36 @@ class OnboardingDataIntegrationService:
|
||||
for data_source in [website_analysis, research_preferences]:
|
||||
if data_source.get('data_freshness'):
|
||||
freshness_scores.append(data_source['data_freshness'])
|
||||
if persona_data and persona_data.get('data_freshness'):
|
||||
freshness_scores.append(persona_data['data_freshness'])
|
||||
if competitor_analysis:
|
||||
for competitor in competitor_analysis:
|
||||
if competitor.get('data_freshness'):
|
||||
freshness_scores.append(competitor['data_freshness'])
|
||||
break # Just use first competitor's freshness
|
||||
if gsc_analytics and gsc_analytics.get('data_freshness'):
|
||||
freshness_scores.append(gsc_analytics['data_freshness'])
|
||||
if bing_analytics and bing_analytics.get('data_freshness'):
|
||||
freshness_scores.append(bing_analytics['data_freshness'])
|
||||
|
||||
quality_metrics['freshness'] = sum(freshness_scores) / len(freshness_scores) if freshness_scores else 0.0
|
||||
|
||||
# Calculate relevance (based on data presence and quality)
|
||||
relevance_score = 0.0
|
||||
if website_analysis.get('domain'):
|
||||
relevance_score += 0.4
|
||||
relevance_score += 0.20
|
||||
if research_preferences.get('research_topics'):
|
||||
relevance_score += 0.3
|
||||
relevance_score += 0.15
|
||||
if api_keys_data:
|
||||
relevance_score += 0.3
|
||||
relevance_score += 0.10
|
||||
if persona_data and persona_data.get('core_persona'):
|
||||
relevance_score += 0.15
|
||||
if competitor_analysis and len(competitor_analysis) > 0:
|
||||
relevance_score += 0.15
|
||||
if gsc_analytics and (gsc_analytics.get('data') or gsc_analytics.get('metrics')):
|
||||
relevance_score += 0.15 # Real analytics data is highly relevant
|
||||
if bing_analytics and (bing_analytics.get('data') or bing_analytics.get('summary')):
|
||||
relevance_score += 0.10 # Real analytics data is highly relevant
|
||||
|
||||
quality_metrics['relevance'] = relevance_score
|
||||
|
||||
@@ -313,7 +372,7 @@ class OnboardingDataIntegrationService:
|
||||
logger.error(f"Error checking API data availability: {str(e)}")
|
||||
return False
|
||||
|
||||
async def _store_integrated_data(self, user_id: int, integrated_data: Dict[str, Any], db: Session) -> None:
|
||||
async def _store_integrated_data(self, user_id: str, integrated_data: Dict[str, Any], db: Session) -> None:
|
||||
"""Store integrated onboarding data."""
|
||||
try:
|
||||
# Create or update integrated data record
|
||||
@@ -355,6 +414,200 @@ class OnboardingDataIntegrationService:
|
||||
# Soft-fail storage: do not break the refresh path
|
||||
return
|
||||
|
||||
def _get_persona_data(self, user_id: str, db: Session) -> Dict[str, Any]:
|
||||
"""Get persona data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Get persona data for this session
|
||||
persona = db.query(PersonaData).filter(
|
||||
PersonaData.session_id == session.id
|
||||
).first()
|
||||
|
||||
if not persona:
|
||||
logger.warning(f"No persona data found for user {user_id}")
|
||||
return {}
|
||||
|
||||
# Convert to dictionary and add metadata
|
||||
persona_dict = persona.to_dict()
|
||||
persona_dict['data_freshness'] = self._calculate_freshness(persona.updated_at)
|
||||
persona_dict['confidence_level'] = 0.9
|
||||
|
||||
logger.info(f"Retrieved persona data for user {user_id}")
|
||||
return persona_dict
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting persona data for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_competitor_analysis(self, user_id: str, db: Session) -> List[Dict[str, Any]]:
|
||||
"""Get competitor analysis data for the user."""
|
||||
try:
|
||||
# Get the latest onboarding session for the user
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
logger.warning(f"🔍 COMPETITOR VALIDATION: No onboarding session found for user {user_id}")
|
||||
return []
|
||||
|
||||
logger.warning(f"🔍 COMPETITOR VALIDATION: Found session {session.id} for user {user_id}")
|
||||
|
||||
# Get all competitor analyses for this session
|
||||
competitor_records = db.query(CompetitorAnalysis).filter(
|
||||
CompetitorAnalysis.session_id == session.id
|
||||
).order_by(CompetitorAnalysis.updated_at.desc()).all()
|
||||
|
||||
if not competitor_records:
|
||||
logger.warning(f"🔍 COMPETITOR VALIDATION: No competitor analysis records found for user {user_id}, session {session.id}")
|
||||
logger.warning(f" Checking all sessions for user {user_id}...")
|
||||
# Check all sessions for this user
|
||||
all_sessions = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).all()
|
||||
logger.warning(f" Total sessions for user: {len(all_sessions)}")
|
||||
for sess in all_sessions:
|
||||
comp_count = db.query(CompetitorAnalysis).filter(
|
||||
CompetitorAnalysis.session_id == sess.id
|
||||
).count()
|
||||
session_timestamp = getattr(sess, 'started_at', None) or getattr(sess, 'updated_at', None)
|
||||
logger.warning(f" Session {sess.id} (timestamp: {session_timestamp}): {comp_count} competitors")
|
||||
return []
|
||||
|
||||
logger.warning(f"🔍 COMPETITOR VALIDATION: Found {len(competitor_records)} competitor records for user {user_id}")
|
||||
|
||||
# Convert to list of dictionaries
|
||||
# Use to_dict() which includes competitor_url, competitor_domain, analysis_data
|
||||
competitors = []
|
||||
for record in competitor_records:
|
||||
competitor_dict = record.to_dict()
|
||||
# Ensure analysis_data is included (to_dict() should include it)
|
||||
if 'analysis_data' not in competitor_dict and record.analysis_data:
|
||||
competitor_dict['analysis_data'] = record.analysis_data
|
||||
competitor_dict['data_freshness'] = self._calculate_freshness(record.updated_at)
|
||||
competitor_dict['confidence_level'] = 0.9 if record.status == 'completed' else 0.5
|
||||
competitors.append(competitor_dict)
|
||||
|
||||
logger.info(f"Retrieved {len(competitors)} competitor analyses for user {user_id}")
|
||||
if competitors:
|
||||
logger.warning(f"🔍 Sample competitor keys: {list(competitors[0].keys())}")
|
||||
logger.warning(f"🔍 Sample competitor has analysis_data: {'analysis_data' in competitors[0]}")
|
||||
if 'analysis_data' in competitors[0]:
|
||||
logger.warning(f"🔍 Sample analysis_data keys: {list(competitors[0]['analysis_data'].keys()) if isinstance(competitors[0]['analysis_data'], dict) else 'Not a dict'}")
|
||||
return competitors
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting competitor analysis for user {user_id}: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _get_gsc_analytics(self, user_id: str) -> Dict[str, Any]:
|
||||
"""Get Google Search Console analytics data for the user."""
|
||||
try:
|
||||
from services.seo.dashboard_service import SEODashboardService
|
||||
from services.database import get_db_session
|
||||
|
||||
db = get_db_session()
|
||||
try:
|
||||
dashboard_service = SEODashboardService(db)
|
||||
gsc_data = await dashboard_service.get_gsc_data(user_id)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
if gsc_data and gsc_data.get('status') != 'disconnected' and not gsc_data.get('error'):
|
||||
logger.info(f"Retrieved GSC analytics for user {user_id}")
|
||||
return {
|
||||
'data': gsc_data.get('data', {}),
|
||||
'metrics': gsc_data.get('metrics', {}),
|
||||
'date_range': gsc_data.get('date_range', {}),
|
||||
'data_freshness': 1.0, # GSC data is typically fresh
|
||||
'confidence_level': 0.9
|
||||
}
|
||||
else:
|
||||
logger.warning(f"No GSC analytics found or not connected for user {user_id}")
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting GSC analytics for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
async def _get_bing_analytics(self, user_id: str) -> Dict[str, Any]:
|
||||
"""Get Bing Webmaster Tools analytics data for the user."""
|
||||
try:
|
||||
from services.seo.dashboard_service import SEODashboardService
|
||||
from services.bing_analytics_storage_service import BingAnalyticsStorageService
|
||||
from services.database import get_db_session
|
||||
|
||||
db = get_db_session()
|
||||
try:
|
||||
dashboard_service = SEODashboardService(db)
|
||||
bing_data = await dashboard_service.get_bing_data(user_id)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Also try to get from storage service for more detailed metrics
|
||||
bing_storage = BingAnalyticsStorageService(os.getenv('DATABASE_URL', 'sqlite:///alwrity.db'))
|
||||
|
||||
# Get site URL from onboarding session if available
|
||||
site_url = None
|
||||
try:
|
||||
from services.database import get_db_session
|
||||
with get_db_session() as db:
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
if session:
|
||||
website_analysis = db.query(WebsiteAnalysis).filter(
|
||||
WebsiteAnalysis.session_id == session.id
|
||||
).order_by(WebsiteAnalysis.updated_at.desc()).first()
|
||||
if website_analysis:
|
||||
site_url = website_analysis.website_url
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not get site URL for Bing analytics: {e}")
|
||||
|
||||
analytics_summary = {}
|
||||
if site_url:
|
||||
try:
|
||||
analytics_summary = bing_storage.get_analytics_summary(user_id, site_url, days=30)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not get Bing analytics summary: {e}")
|
||||
|
||||
if bing_data and bing_data.get('status') != 'disconnected' and not bing_data.get('error'):
|
||||
logger.info(f"Retrieved Bing analytics for user {user_id}")
|
||||
return {
|
||||
'data': bing_data.get('data', {}),
|
||||
'metrics': bing_data.get('metrics', {}),
|
||||
'summary': analytics_summary,
|
||||
'date_range': bing_data.get('date_range', {}),
|
||||
'data_freshness': 1.0, # Bing data is typically fresh
|
||||
'confidence_level': 0.9
|
||||
}
|
||||
elif analytics_summary and not analytics_summary.get('error'):
|
||||
# Use stored analytics if available even if API is disconnected
|
||||
logger.info(f"Retrieved Bing analytics from storage for user {user_id}")
|
||||
return {
|
||||
'data': {},
|
||||
'metrics': {},
|
||||
'summary': analytics_summary,
|
||||
'date_range': {},
|
||||
'data_freshness': 0.8, # Stored data might be slightly older
|
||||
'confidence_level': 0.85
|
||||
}
|
||||
else:
|
||||
logger.warning(f"No Bing analytics found or not connected for user {user_id}")
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting Bing analytics for user {user_id}: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_fallback_data(self) -> Dict[str, Any]:
|
||||
"""Get fallback data when processing fails."""
|
||||
return {
|
||||
@@ -362,6 +615,10 @@ class OnboardingDataIntegrationService:
|
||||
'research_preferences': {},
|
||||
'api_keys_data': {},
|
||||
'onboarding_session': {},
|
||||
'persona_data': {},
|
||||
'competitor_analysis': [],
|
||||
'gsc_analytics': {},
|
||||
'bing_analytics': {},
|
||||
'data_quality': {
|
||||
'overall_score': 0.0,
|
||||
'completeness': 0.0,
|
||||
|
||||
Reference in New Issue
Block a user