AI platform insights monitoring and website analysis monitoring services added

This commit is contained in:
ajaysi
2025-11-11 15:57:45 +05:30
parent d99c7c83a7
commit 7191c7e7f0
81 changed files with 10860 additions and 1567 deletions

View File

@@ -81,6 +81,30 @@ class OnboardingCompletionService:
# Non-critical: log but don't fail onboarding completion
logger.warning(f"Failed to create OAuth token monitoring tasks for user {user_id}: {e}")
# Create website analysis tasks for user's website and competitors
try:
from services.database import SessionLocal
from services.website_analysis_monitoring_service import create_website_analysis_tasks
db = SessionLocal()
try:
result = create_website_analysis_tasks(user_id=user_id, db=db)
if result.get('success'):
tasks_count = result.get('tasks_created', 0)
logger.info(
f"Created {tasks_count} website analysis tasks for user {user_id} "
f"on onboarding completion"
)
else:
error = result.get('error', 'Unknown error')
logger.warning(
f"Failed to create website analysis tasks for user {user_id}: {error}"
)
finally:
db.close()
except Exception as e:
# Non-critical: log but don't fail onboarding completion
logger.warning(f"Failed to create website analysis tasks for user {user_id}: {e}")
return {
"message": "Onboarding completed successfully",
"completed_at": datetime.now().isoformat(),

View File

@@ -432,13 +432,13 @@ class Step3ResearchService:
logger.error(f"Error storing research data: {str(e)}")
return False
async def get_research_data(self, session_id: str) -> Dict[str, Any]:
async def get_research_data(self, session_id: str) -> Dict[str, Any]:
"""
Retrieve research data for a session.
Args:
session_id: Onboarding session ID
Returns:
Dictionary containing research data
"""
@@ -447,25 +447,76 @@ class Step3ResearchService:
session = db.query(OnboardingSession).filter(
OnboardingSession.id == session_id
).first()
if not session:
return {
"success": False,
"error": "Session not found"
}
research_data = session.step_data.get("step3_research_data") if session.step_data else None
# Check if step_data attribute exists (it may not be in the model)
# If it doesn't exist, try to get data from CompetitorAnalysis table
research_data = None
if hasattr(session, 'step_data') and session.step_data:
research_data = session.step_data.get("step3_research_data") if isinstance(session.step_data, dict) else None
# If not found in step_data, try CompetitorAnalysis table
if not research_data:
try:
from models.onboarding import CompetitorAnalysis
competitor_records = db.query(CompetitorAnalysis).filter(
CompetitorAnalysis.session_id == session.id
).all()
if competitor_records:
competitors = []
for record in competitor_records:
analysis_data = record.analysis_data or {}
competitor_info = {
"url": record.competitor_url,
"domain": record.competitor_domain or record.competitor_url,
"title": analysis_data.get("title", record.competitor_domain or ""),
"summary": analysis_data.get("summary", ""),
"relevance_score": analysis_data.get("relevance_score", 0.5),
"highlights": analysis_data.get("highlights", []),
"favicon": analysis_data.get("favicon"),
"image": analysis_data.get("image"),
"published_date": analysis_data.get("published_date"),
"author": analysis_data.get("author"),
"competitive_insights": analysis_data.get("competitive_analysis", {}),
"content_insights": analysis_data.get("content_insights", {})
}
competitors.append(competitor_info)
if competitors:
# Map competitor fields to match frontend expectations
mapped_competitors = []
for comp in competitors:
mapped_comp = {
**comp, # Keep all original fields
"name": comp.get("title") or comp.get("name") or comp.get("domain", ""),
"description": comp.get("summary") or comp.get("description", ""),
"similarity_score": comp.get("relevance_score") or comp.get("similarity_score", 0.5)
}
mapped_competitors.append(mapped_comp)
research_data = {
"competitors": mapped_competitors,
"completed_at": competitor_records[0].created_at.isoformat() if competitor_records[0].created_at else None
}
except Exception as e:
logger.warning(f"Could not retrieve competitors from CompetitorAnalysis table: {e}")
if not research_data:
return {
"success": False,
"error": "No research data found for this session"
"error": "No research data found for this session"
}
return {
"success": True,
"research_data": research_data,
"session_id": session_id
"step3_research_data": research_data,
"research_data": research_data # Keep for backward compatibility
}
except Exception as e:

View File

@@ -4,12 +4,12 @@ Provides provider availability and persona-aware defaults for research.
"""
from fastapi import APIRouter, Depends, HTTPException, Query
from typing import Dict, Any, Optional
from typing import Dict, Any, Optional, List
from loguru import logger
from pydantic import BaseModel
from middleware.auth_middleware import get_current_user
from services.user_api_key_context import get_exa_key, get_gemini_key
from services.user_api_key_context import get_exa_key, get_gemini_key, get_tavily_key
from services.onboarding.database_service import OnboardingDatabaseService
from services.onboarding.progress_service import get_onboarding_progress_service
from services.database import get_db
@@ -26,8 +26,10 @@ class ProviderAvailability(BaseModel):
"""Provider availability status."""
google_available: bool
exa_available: bool
tavily_available: bool
gemini_key_status: str # 'configured' | 'missing'
exa_key_status: str # 'configured' | 'missing'
tavily_key_status: str # 'configured' | 'missing'
class PersonaDefaults(BaseModel):
@@ -47,6 +49,17 @@ class ResearchConfigResponse(BaseModel):
persona_scheduled: bool = False
class CompetitorAnalysisResponse(BaseModel):
"""Response model for competitor analysis data."""
success: bool
competitors: Optional[List[Dict[str, Any]]] = None
social_media_accounts: Optional[Dict[str, str]] = None
social_media_citations: Optional[List[Dict[str, Any]]] = None
research_summary: Optional[Dict[str, Any]] = None
analysis_timestamp: Optional[str] = None
error: Optional[str] = None
@router.get("/provider-availability", response_model=ProviderAvailability)
async def get_provider_availability(
current_user: Dict = Depends(get_current_user)
@@ -57,6 +70,7 @@ async def get_provider_availability(
Returns:
- google_available: True if Gemini key is configured
- exa_available: True if Exa key is configured
- tavily_available: True if Tavily key is configured
- Key status for each provider
"""
try:
@@ -65,15 +79,19 @@ async def get_provider_availability(
# Check API key availability
gemini_key = get_gemini_key(user_id)
exa_key = get_exa_key(user_id)
tavily_key = get_tavily_key(user_id)
google_available = bool(gemini_key and gemini_key.strip())
exa_available = bool(exa_key and exa_key.strip())
tavily_available = bool(tavily_key and tavily_key.strip())
return ProviderAvailability(
google_available=google_available,
exa_available=exa_available,
tavily_available=tavily_available,
gemini_key_status='configured' if google_available else 'missing',
exa_key_status='configured' if exa_available else 'missing'
exa_key_status='configured' if exa_available else 'missing',
tavily_key_status='configured' if tavily_available else 'missing'
)
except Exception as e:
logger.error(f"[ResearchConfig] Error checking provider availability for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
@@ -211,15 +229,19 @@ async def get_research_config(
logger.debug(f"[ResearchConfig] Getting provider availability for user {user_id}")
gemini_key = get_gemini_key(user_id)
exa_key = get_exa_key(user_id)
tavily_key = get_tavily_key(user_id)
google_available = bool(gemini_key and gemini_key.strip())
exa_available = bool(exa_key and exa_key.strip())
tavily_available = bool(tavily_key and tavily_key.strip())
provider_availability = ProviderAvailability(
google_available=google_available,
exa_available=exa_available,
tavily_available=tavily_available,
gemini_key_status='configured' if google_available else 'missing',
exa_key_status='configured' if exa_available else 'missing'
exa_key_status='configured' if exa_available else 'missing',
tavily_key_status='configured' if tavily_available else 'missing'
)
# Get persona defaults
@@ -355,11 +377,190 @@ async def get_research_config(
import traceback
logger.error(f"[ResearchConfig] Full traceback:\n{traceback.format_exc()}")
raise HTTPException(
status_code=500,
status_code=500,
detail=f"Failed to get research config: {str(e)}"
)
@router.get("/competitor-analysis", response_model=CompetitorAnalysisResponse)
async def get_competitor_analysis(
current_user: Dict = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get competitor analysis data from onboarding for the current user.
Returns competitor data including competitors list, social media accounts,
social media citations, and research summary that was collected during onboarding step 3.
"""
user_id = None
try:
user_id = str(current_user.get('id'))
print(f"\n[COMPETITOR_ANALYSIS] ===== START: Getting competitor analysis for user_id={user_id} =====")
print(f"[COMPETITOR_ANALYSIS] Current user dict keys: {list(current_user.keys())}")
logger.info(f"[ResearchConfig] Getting competitor analysis for user {user_id}")
if not db:
print(f"[COMPETITOR_ANALYSIS] ❌ ERROR: Database session is None for user {user_id}")
logger.error(f"[ResearchConfig] Database session is None for user {user_id}")
raise HTTPException(status_code=500, detail="Database session not available")
db_service = OnboardingDatabaseService(db=db)
# Get onboarding session - using same pattern as onboarding completion check
print(f"[COMPETITOR_ANALYSIS] Looking up onboarding session for user_id={user_id} (Clerk ID)")
session = db_service.get_session_by_user(user_id, db)
if not session:
print(f"[COMPETITOR_ANALYSIS] ❌ WARNING: No onboarding session found for user_id={user_id}")
logger.warning(f"[ResearchConfig] No onboarding session found for user {user_id}")
return CompetitorAnalysisResponse(
success=False,
error="No onboarding session found. Please complete onboarding first."
)
print(f"[COMPETITOR_ANALYSIS] ✅ Found onboarding session: id={session.id}, user_id={session.user_id}, current_step={session.current_step}")
# Check if step 3 is completed - same pattern as elsewhere (check current_step >= 3 or research_preferences exists)
research_preferences = db_service.get_research_preferences(user_id, db)
print(f"[COMPETITOR_ANALYSIS] Step check: current_step={session.current_step}, research_preferences exists={research_preferences is not None}")
if not research_preferences and session.current_step < 3:
print(f"[COMPETITOR_ANALYSIS] ❌ Step 3 not completed for user_id={user_id} (current_step={session.current_step})")
logger.info(f"[ResearchConfig] Step 3 not completed for user {user_id} (current_step={session.current_step})")
return CompetitorAnalysisResponse(
success=False,
error="Onboarding step 3 (Competitor Analysis) is not completed. Please complete onboarding step 3 first."
)
print(f"[COMPETITOR_ANALYSIS] ✅ Step 3 is completed (current_step={session.current_step} or research_preferences exists)")
# Try Method 1: Get competitor data from CompetitorAnalysis table using OnboardingDatabaseService
# This follows the same pattern as get_website_analysis()
print(f"[COMPETITOR_ANALYSIS] 🔍 Method 1: Querying CompetitorAnalysis table using OnboardingDatabaseService...")
try:
competitors = db_service.get_competitor_analysis(user_id, db)
if competitors:
print(f"[COMPETITOR_ANALYSIS] ✅ Found {len(competitors)} competitor records from CompetitorAnalysis table")
logger.info(f"[ResearchConfig] Found {len(competitors)} competitors from CompetitorAnalysis table for user {user_id}")
# Map competitor fields to match frontend expectations
mapped_competitors = []
for comp in competitors:
mapped_comp = {
**comp, # Keep all original fields
"name": comp.get("title") or comp.get("name") or comp.get("domain", ""),
"description": comp.get("summary") or comp.get("description", ""),
"similarity_score": comp.get("relevance_score") or comp.get("similarity_score", 0.5)
}
mapped_competitors.append(mapped_comp)
print(f"[COMPETITOR_ANALYSIS] ✅ SUCCESS: Returning {len(mapped_competitors)} competitors for user_id={user_id}")
return CompetitorAnalysisResponse(
success=True,
competitors=mapped_competitors,
social_media_accounts={},
social_media_citations=[],
research_summary={
"total_competitors": len(mapped_competitors),
"market_insights": f"Found {len(mapped_competitors)} competitors analyzed during onboarding"
},
analysis_timestamp=None
)
else:
print(f"[COMPETITOR_ANALYSIS] ⚠️ No competitor records found in CompetitorAnalysis table for user_id={user_id}")
except Exception as e:
print(f"[COMPETITOR_ANALYSIS] ❌ EXCEPTION in Method 1: {e}")
import traceback
print(f"[COMPETITOR_ANALYSIS] Traceback:\n{traceback.format_exc()}")
logger.warning(f"[ResearchConfig] Could not retrieve competitor data from CompetitorAnalysis table: {e}", exc_info=True)
# Try Method 2: Get data from Step3ResearchService (which accesses step_data)
# This is where step3_research_service._store_research_data() saves the data
print(f"[COMPETITOR_ANALYSIS] 🔄 Method 2: Trying Step3ResearchService.get_research_data()...")
try:
from api.onboarding_utils.step3_research_service import Step3ResearchService
# Step3ResearchService.get_research_data() expects session_id (integer), but we have user_id (string)
# The service uses session.id internally, so we need to pass the session.id
step3_service = Step3ResearchService()
research_data_result = await step3_service.get_research_data(str(session.id))
print(f"[COMPETITOR_ANALYSIS] Step3ResearchService.get_research_data() result: success={research_data_result.get('success')}")
if research_data_result.get('success'):
# Handle both 'research_data' and 'step3_research_data' keys
research_data = research_data_result.get('step3_research_data') or research_data_result.get('research_data', {})
print(f"[COMPETITOR_ANALYSIS] Research data keys: {list(research_data.keys()) if isinstance(research_data, dict) else 'Not a dict'}")
if isinstance(research_data, dict) and research_data.get('competitors'):
competitors_list = research_data.get('competitors', [])
print(f"[COMPETITOR_ANALYSIS] ✅ Found {len(competitors_list)} competitors in step_data via Step3ResearchService")
if competitors_list:
analysis_metadata = research_data.get('analysis_metadata', {})
social_media_data = analysis_metadata.get('social_media_data', {})
# Map competitor fields to match frontend expectations
mapped_competitors = []
for comp in competitors_list:
mapped_comp = {
**comp, # Keep all original fields
"name": comp.get("title") or comp.get("name") or comp.get("domain", ""),
"description": comp.get("summary") or comp.get("description", ""),
"similarity_score": comp.get("relevance_score") or comp.get("similarity_score", 0.5)
}
mapped_competitors.append(mapped_comp)
print(f"[COMPETITOR_ANALYSIS] ✅ SUCCESS: Returning {len(mapped_competitors)} competitors from step_data for user_id={user_id}")
logger.info(f"[ResearchConfig] Found {len(mapped_competitors)} competitors from step_data via Step3ResearchService for user {user_id}")
return CompetitorAnalysisResponse(
success=True,
competitors=mapped_competitors,
social_media_accounts=social_media_data.get('social_media_accounts', {}),
social_media_citations=social_media_data.get('citations', []),
research_summary=research_data.get('research_summary'),
analysis_timestamp=research_data.get('completed_at')
)
else:
print(f"[COMPETITOR_ANALYSIS] ⚠️ Step3ResearchService returned competitors list but it's empty")
else:
print(f"[COMPETITOR_ANALYSIS] ⚠️ Step3ResearchService returned success=True but no competitors in data")
else:
error_msg = research_data_result.get('error', 'Unknown error')
print(f"[COMPETITOR_ANALYSIS] ⚠️ Step3ResearchService returned success=False, error: {error_msg}")
except Exception as e:
print(f"[COMPETITOR_ANALYSIS] ❌ EXCEPTION in Method 2: {e}")
import traceback
print(f"[COMPETITOR_ANALYSIS] Traceback:\n{traceback.format_exc()}")
logger.warning(f"[ResearchConfig] Could not retrieve competitor data from Step3ResearchService: {e}", exc_info=True)
# Fallback: Return empty response with helpful message
print(f"[COMPETITOR_ANALYSIS] ❌ FALLBACK: No competitor analysis data found for user_id={user_id}")
print(f"[COMPETITOR_ANALYSIS] Step 3 is completed (current_step={session.current_step}) but no data found in either source")
logger.info(f"[ResearchConfig] No competitor analysis data found for user {user_id} (step 3 completed but no data found)")
return CompetitorAnalysisResponse(
success=False,
error="Competitor analysis data was not found in the database. Please re-run competitor discovery in Step 3 of onboarding to generate and save competitor data."
)
except HTTPException:
print(f"[COMPETITOR_ANALYSIS] ❌ HTTPException raised (will be re-raised)")
raise
except Exception as e:
print(f"[COMPETITOR_ANALYSIS] ❌ CRITICAL ERROR: {e}")
import traceback
print(f"[COMPETITOR_ANALYSIS] Traceback:\n{traceback.format_exc()}")
logger.error(f"[ResearchConfig] Error getting competitor analysis for user {user_id if user_id else 'unknown'}: {e}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Failed to get competitor analysis: {str(e)}"
)
finally:
print(f"[COMPETITOR_ANALYSIS] ===== END: Getting competitor analysis for user_id={user_id} =====\n")
# Helper functions from RESEARCH_AI_HYPERPERSONALIZATION.md
def _get_domain_suggestions(industry: str) -> list[str]:

View File

@@ -18,11 +18,68 @@ from middleware.auth_middleware import get_current_user
from models.monitoring_models import TaskExecutionLog, MonitoringTask
from models.scheduler_models import SchedulerEventLog
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
from sqlalchemy import func
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
from models.website_analysis_monitoring_models import WebsiteAnalysisTask, WebsiteAnalysisExecutionLog
router = APIRouter(prefix="/api/scheduler", tags=["scheduler-dashboard"])
def _rebuild_cumulative_stats_from_events(db: Session) -> Dict[str, int]:
"""
Rebuild cumulative stats by aggregating all check_cycle events from event logs.
This is used as a fallback when the cumulative stats table doesn't exist or is invalid.
Args:
db: Database session
Returns:
Dictionary with cumulative stats
"""
try:
# Aggregate check cycle events for cumulative totals
result = db.query(
func.count(SchedulerEventLog.id),
func.sum(SchedulerEventLog.tasks_found),
func.sum(SchedulerEventLog.tasks_executed),
func.sum(SchedulerEventLog.tasks_failed)
).filter(
SchedulerEventLog.event_type == 'check_cycle'
).first()
if result:
# SQLAlchemy returns tuple for multi-column queries
# SUM returns NULL when no rows, handle that
total_cycles = result[0] if result[0] is not None else 0
total_found = result[1] if result[1] is not None else 0
total_executed = result[2] if result[2] is not None else 0
total_failed = result[3] if result[3] is not None else 0
return {
'total_check_cycles': int(total_cycles),
'cumulative_tasks_found': int(total_found),
'cumulative_tasks_executed': int(total_executed),
'cumulative_tasks_failed': int(total_failed),
'cumulative_tasks_skipped': 0 # Not tracked in event logs currently
}
else:
return {
'total_check_cycles': 0,
'cumulative_tasks_found': 0,
'cumulative_tasks_executed': 0,
'cumulative_tasks_failed': 0,
'cumulative_tasks_skipped': 0
}
except Exception as e:
logger.error(f"[Dashboard] Error rebuilding cumulative stats from events: {e}", exc_info=True)
return {
'total_check_cycles': 0,
'cumulative_tasks_found': 0,
'cumulative_tasks_executed': 0,
'cumulative_tasks_failed': 0,
'cumulative_tasks_skipped': 0
}
@router.get("/dashboard")
async def get_scheduler_dashboard(
current_user: Dict[str, Any] = Depends(get_current_user),
@@ -139,98 +196,172 @@ async def get_scheduler_dashboard(
except Exception as e:
logger.error(f"Error loading OAuth token monitoring tasks: {e}", exc_info=True)
# Load website analysis tasks
try:
website_analysis_tasks = db.query(WebsiteAnalysisTask).filter(
WebsiteAnalysisTask.status == 'active'
).all()
# Filter by user if user_id_str is provided
if user_id_str:
website_analysis_tasks = [t for t in website_analysis_tasks if t.user_id == user_id_str]
for task in website_analysis_tasks:
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
user_job_store = 'default'
logger.debug(f"Could not get job store for user {task.user_id}: {e}")
# Format as recurring job
job_info = {
'id': f"website_analysis_{task.task_type}_{task.user_id}_{task.id}",
'trigger_type': 'CronTrigger', # Recurring based on frequency_days
'next_run_time': task.next_check.isoformat() if task.next_check else None,
'user_id': task.user_id,
'job_store': 'default',
'user_job_store': user_job_store,
'function_name': 'website_analysis_executor.execute_task',
'task_type': task.task_type, # 'user_website' or 'competitor'
'website_url': task.website_url,
'competitor_id': task.competitor_id,
'task_id': task.id,
'is_database_task': True,
'frequency': f'Every {task.frequency_days} days',
'task_category': 'website_analysis'
}
formatted_jobs.append(job_info)
except Exception as e:
logger.error(f"Error loading website analysis tasks: {e}", exc_info=True)
# Load platform insights tasks (GSC and Bing)
try:
insights_tasks = db.query(PlatformInsightsTask).filter(
PlatformInsightsTask.status == 'active'
).all()
# Filter by user if user_id_str is provided
if user_id_str:
insights_tasks = [t for t in insights_tasks if t.user_id == user_id_str]
for task in insights_tasks:
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
user_job_store = 'default'
logger.debug(f"Could not get job store for user {task.user_id}: {e}")
# Format as recurring weekly job
job_info = {
'id': f"platform_insights_{task.platform}_{task.user_id}",
'trigger_type': 'CronTrigger', # Weekly recurring
'next_run_time': task.next_check.isoformat() if task.next_check else None,
'user_id': task.user_id,
'job_store': 'default',
'user_job_store': user_job_store,
'function_name': f'{task.platform}_insights_executor.execute_task',
'platform': task.platform,
'task_id': task.id,
'is_database_task': True,
'frequency': 'Weekly',
'task_category': 'platform_insights'
}
formatted_jobs.append(job_info)
except Exception as e:
logger.error(f"Error loading platform insights tasks: {e}", exc_info=True)
# Get active strategies count
active_strategies = stats.get('active_strategies_count', 0)
# Get last_update from stats (added by scheduler for frontend polling)
last_update = stats.get('last_update')
# Calculate cumulative/historical values from scheduler_event_logs
# Calculate cumulative/historical values from persistent cumulative stats table
# Fallback to event logs aggregation if cumulative stats table doesn't exist or is invalid
cumulative_stats = {}
try:
# First, check total events in database for debugging
total_events = db.query(func.count(SchedulerEventLog.id)).scalar() or 0
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
# Check for check_cycle events specifically
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'check_cycle'
).scalar() or 0
# Try to get cumulative stats from dedicated table (persistent across restarts)
cumulative_stats_row = db.query(SchedulerCumulativeStats).filter(
SchedulerCumulativeStats.id == 1
).first()
# Also check for other event types that might have task counts
job_failed_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'job_failed'
).scalar() or 0
job_completed_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'job_completed'
).scalar() or 0
logger.warning(
f"[Dashboard] Database stats: {total_events} total events, "
f"{check_cycle_count} check_cycles, {job_failed_count} job_failed, "
f"{job_completed_count} job_completed"
)
if check_cycle_count > 0:
logger.warning(f"[Dashboard] Found {check_cycle_count} check cycle events in database")
# Aggregate check cycle events for cumulative totals
result = db.query(
func.count(SchedulerEventLog.id),
func.sum(SchedulerEventLog.tasks_found),
func.sum(SchedulerEventLog.tasks_executed),
func.sum(SchedulerEventLog.tasks_failed)
).filter(
SchedulerEventLog.event_type == 'check_cycle'
).first()
if cumulative_stats_row:
# Use persistent cumulative stats
cumulative_stats = {
'total_check_cycles': int(cumulative_stats_row.total_check_cycles or 0),
'cumulative_tasks_found': int(cumulative_stats_row.cumulative_tasks_found or 0),
'cumulative_tasks_executed': int(cumulative_stats_row.cumulative_tasks_executed or 0),
'cumulative_tasks_failed': int(cumulative_stats_row.cumulative_tasks_failed or 0),
'cumulative_tasks_skipped': int(cumulative_stats_row.cumulative_tasks_skipped or 0),
'cumulative_job_completed': int(cumulative_stats_row.cumulative_job_completed or 0),
'cumulative_job_failed': int(cumulative_stats_row.cumulative_job_failed or 0)
}
if result:
# SQLAlchemy returns tuple for multi-column queries
# SUM returns NULL when no rows, handle that
total_cycles = result[0] if result[0] is not None else 0
total_found = result[1] if result[1] is not None else 0
total_executed = result[2] if result[2] is not None else 0
total_failed = result[3] if result[3] is not None else 0
cumulative_stats = {
'total_check_cycles': int(total_cycles),
'cumulative_tasks_found': int(total_found),
'cumulative_tasks_executed': int(total_executed),
'cumulative_tasks_failed': int(total_failed)
}
logger.warning(f"[Dashboard] Cumulative stats from check_cycles: {cumulative_stats}")
else:
# No results (shouldn't happen with COUNT, but handle it)
cumulative_stats = {
'total_check_cycles': 0,
'cumulative_tasks_found': 0,
'cumulative_tasks_executed': 0,
'cumulative_tasks_failed': 0
}
logger.warning("[Dashboard] Query returned None (no check cycle events)")
logger.debug(
f"[Dashboard] Using persistent cumulative stats: "
f"cycles={cumulative_stats['total_check_cycles']}, "
f"found={cumulative_stats['cumulative_tasks_found']}, "
f"executed={cumulative_stats['cumulative_tasks_executed']}, "
f"failed={cumulative_stats['cumulative_tasks_failed']}"
)
# Validate cumulative stats by comparing with event logs (for verification)
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'check_cycle'
).scalar() or 0
if cumulative_stats['total_check_cycles'] != check_cycle_count:
logger.warning(
f"[Dashboard] ⚠️ Cumulative stats validation mismatch: "
f"cumulative_stats.total_check_cycles={cumulative_stats['total_check_cycles']} "
f"vs event_logs.count={check_cycle_count}. "
f"Rebuilding cumulative stats from event logs..."
)
# Rebuild cumulative stats from event logs
cumulative_stats = _rebuild_cumulative_stats_from_events(db)
# Update the persistent table
if cumulative_stats_row:
cumulative_stats_row.total_check_cycles = cumulative_stats['total_check_cycles']
cumulative_stats_row.cumulative_tasks_found = cumulative_stats['cumulative_tasks_found']
cumulative_stats_row.cumulative_tasks_executed = cumulative_stats['cumulative_tasks_executed']
cumulative_stats_row.cumulative_tasks_failed = cumulative_stats['cumulative_tasks_failed']
cumulative_stats_row.cumulative_tasks_skipped = cumulative_stats.get('cumulative_tasks_skipped', 0)
db.commit()
logger.warning(f"[Dashboard] ✅ Rebuilt cumulative stats: {cumulative_stats}")
else:
# No check cycles yet, but we can still show job counts
# Log detailed info about why cumulative stats are 0
if stats.get('total_checks', 0) > 0:
logger.warning(
f"[Dashboard] ⚠️ Scheduler shows {stats.get('total_checks', 0)} checks in memory, "
f"but NO check_cycle events found in database. "
f"This suggests check_cycle events are not being saved properly."
)
else:
logger.warning(
f"[Dashboard] No check_cycle events yet. "
f"Scheduler interval: {stats.get('check_interval_minutes', 60)}min. "
f"First check cycle will run after interval expires. "
f"One-time jobs: {job_completed_count} completed, {job_failed_count} failed"
)
# Cumulative stats table doesn't exist or is empty, rebuild from event logs
logger.warning(
"[Dashboard] Cumulative stats table not found or empty. "
"Rebuilding from event logs..."
)
cumulative_stats = _rebuild_cumulative_stats_from_events(db)
# Create/update the persistent table
cumulative_stats_row = SchedulerCumulativeStats.get_or_create(db)
cumulative_stats_row.total_check_cycles = cumulative_stats['total_check_cycles']
cumulative_stats_row.cumulative_tasks_found = cumulative_stats['cumulative_tasks_found']
cumulative_stats_row.cumulative_tasks_executed = cumulative_stats['cumulative_tasks_executed']
cumulative_stats_row.cumulative_tasks_failed = cumulative_stats['cumulative_tasks_failed']
cumulative_stats_row.cumulative_tasks_skipped = cumulative_stats.get('cumulative_tasks_skipped', 0)
db.commit()
logger.warning(f"[Dashboard] ✅ Created/updated cumulative stats: {cumulative_stats}")
except ImportError:
# Cumulative stats model doesn't exist yet (migration not run)
logger.warning(
"[Dashboard] Cumulative stats model not found. "
"Falling back to event logs aggregation. "
"Run migration: create_scheduler_cumulative_stats.sql"
)
cumulative_stats = _rebuild_cumulative_stats_from_events(db)
except Exception as e:
logger.error(f"Error calculating cumulative stats: {e}", exc_info=True)
cumulative_stats = {
'total_check_cycles': 0,
'cumulative_tasks_found': 0,
'cumulative_tasks_executed': 0,
'cumulative_tasks_failed': 0
}
logger.error(f"[Dashboard] Error getting cumulative stats: {e}", exc_info=True)
# Fallback to event logs aggregation
cumulative_stats = _rebuild_cumulative_stats_from_events(db)
return {
'stats': {
@@ -259,8 +390,9 @@ async def get_scheduler_dashboard(
},
'jobs': formatted_jobs,
'job_count': len(formatted_jobs),
'recurring_jobs': 1 + len([j for j in formatted_jobs if j.get('is_database_task')]), # check_due_tasks + OAuth tasks
'recurring_jobs': 1 + len([j for j in formatted_jobs if j.get('is_database_task')]), # check_due_tasks + all DB tasks
'one_time_jobs': len([j for j in formatted_jobs if not j.get('is_database_task') and j.get('trigger_type') == 'DateTrigger']),
'registered_task_types': stats.get('registered_types', []), # Include registered task types
'user_isolation': {
'enabled': True,
'current_user_id': user_id_str
@@ -704,3 +836,381 @@ async def get_recent_scheduler_logs(
logger.error(f"Error getting recent scheduler logs: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get recent scheduler logs: {str(e)}")
@router.get("/platform-insights/status/{user_id}")
async def get_platform_insights_status(
user_id: str,
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get platform insights task status for a user.
Returns:
- GSC insights tasks
- Bing insights tasks
- Task details and execution logs
"""
try:
# Verify user can only access their own data
if str(current_user.get('id')) != user_id:
raise HTTPException(status_code=403, detail="Access denied")
logger.debug(f"[Platform Insights Status] Getting status for user: {user_id}")
# Get all insights tasks for user
tasks = db.query(PlatformInsightsTask).filter(
PlatformInsightsTask.user_id == user_id
).order_by(PlatformInsightsTask.platform, PlatformInsightsTask.created_at).all()
# Check if user has connected platforms but missing insights tasks
# Auto-create missing tasks for connected platforms
from services.oauth_token_monitoring_service import get_connected_platforms
from services.platform_insights_monitoring_service import create_platform_insights_task
connected_platforms = get_connected_platforms(user_id)
insights_platforms = ['gsc', 'bing']
connected_insights = [p for p in connected_platforms if p in insights_platforms]
existing_platforms = {task.platform for task in tasks}
missing_platforms = [p for p in connected_insights if p not in existing_platforms]
if missing_platforms:
logger.info(
f"[Platform Insights Status] User {user_id} has connected platforms {missing_platforms} "
f"but missing insights tasks. Creating tasks..."
)
for platform in missing_platforms:
try:
# Don't fetch site_url here - it requires API calls
# The executor will fetch it when the task runs
# Create task without site_url to avoid API calls during status checks
result = create_platform_insights_task(
user_id=user_id,
platform=platform,
site_url=None, # Will be fetched by executor when task runs
db=db
)
if result.get('success'):
logger.info(f"[Platform Insights Status] Created {platform.upper()} insights task for user {user_id}")
else:
logger.warning(f"[Platform Insights Status] Failed to create {platform} task: {result.get('error')}")
except Exception as e:
logger.warning(f"[Platform Insights Status] Error creating {platform} task: {e}", exc_info=True)
# Re-query tasks after creation
tasks = db.query(PlatformInsightsTask).filter(
PlatformInsightsTask.user_id == user_id
).order_by(PlatformInsightsTask.platform, PlatformInsightsTask.created_at).all()
# Group tasks by platform
gsc_tasks = [t for t in tasks if t.platform == 'gsc']
bing_tasks = [t for t in tasks if t.platform == 'bing']
logger.debug(
f"[Platform Insights Status] Found {len(tasks)} total tasks: "
f"{len(gsc_tasks)} GSC, {len(bing_tasks)} Bing"
)
# Format tasks
def format_task(task: PlatformInsightsTask) -> Dict[str, Any]:
return {
'id': task.id,
'platform': task.platform,
'site_url': task.site_url,
'status': task.status,
'last_check': task.last_check.isoformat() if task.last_check else None,
'last_success': task.last_success.isoformat() if task.last_success else None,
'last_failure': task.last_failure.isoformat() if task.last_failure else None,
'failure_reason': task.failure_reason,
'next_check': task.next_check.isoformat() if task.next_check else None,
'created_at': task.created_at.isoformat() if task.created_at else None,
'updated_at': task.updated_at.isoformat() if task.updated_at else None
}
return {
'success': True,
'user_id': user_id,
'gsc_tasks': [format_task(t) for t in gsc_tasks],
'bing_tasks': [format_task(t) for t in bing_tasks],
'total_tasks': len(tasks)
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting platform insights status for user {user_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to get platform insights status: {str(e)}")
@router.get("/website-analysis/status/{user_id}")
async def get_website_analysis_status(
user_id: str,
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get website analysis task status for a user.
Returns:
- User website tasks
- Competitor website tasks
- Task details and execution logs
"""
try:
# Verify user can only access their own data
if str(current_user.get('id')) != user_id:
raise HTTPException(status_code=403, detail="Access denied")
logger.debug(f"[Website Analysis Status] Getting status for user: {user_id}")
# Get all website analysis tasks for user
tasks = db.query(WebsiteAnalysisTask).filter(
WebsiteAnalysisTask.user_id == user_id
).order_by(WebsiteAnalysisTask.task_type, WebsiteAnalysisTask.created_at).all()
# Separate user website and competitor tasks
user_website_tasks = [t for t in tasks if t.task_type == 'user_website']
competitor_tasks = [t for t in tasks if t.task_type == 'competitor']
logger.debug(
f"[Website Analysis Status] Found {len(tasks)} tasks for user {user_id}: "
f"{len(user_website_tasks)} user website, {len(competitor_tasks)} competitors"
)
# Format tasks
def format_task(task: WebsiteAnalysisTask) -> Dict[str, Any]:
return {
'id': task.id,
'website_url': task.website_url,
'task_type': task.task_type,
'competitor_id': task.competitor_id,
'status': task.status,
'last_check': task.last_check.isoformat() if task.last_check else None,
'last_success': task.last_success.isoformat() if task.last_success else None,
'last_failure': task.last_failure.isoformat() if task.last_failure else None,
'failure_reason': task.failure_reason,
'next_check': task.next_check.isoformat() if task.next_check else None,
'frequency_days': task.frequency_days,
'created_at': task.created_at.isoformat() if task.created_at else None,
'updated_at': task.updated_at.isoformat() if task.updated_at else None
}
active_tasks = len([t for t in tasks if t.status == 'active'])
failed_tasks = len([t for t in tasks if t.status == 'failed'])
return {
'success': True,
'data': {
'user_id': user_id,
'user_website_tasks': [format_task(t) for t in user_website_tasks],
'competitor_tasks': [format_task(t) for t in competitor_tasks],
'total_tasks': len(tasks),
'active_tasks': active_tasks,
'failed_tasks': failed_tasks
}
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting website analysis status for user {user_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to get website analysis status: {str(e)}")
@router.get("/website-analysis/logs/{user_id}")
async def get_website_analysis_logs(
user_id: str,
task_id: Optional[int] = Query(None),
limit: int = Query(10, ge=1, le=100),
offset: int = Query(0, ge=0),
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get execution logs for website analysis tasks.
Args:
user_id: User ID
task_id: Optional task ID to filter logs
limit: Maximum number of logs to return
offset: Pagination offset
Returns:
List of execution logs
"""
try:
# Verify user can only access their own data
if str(current_user.get('id')) != user_id:
raise HTTPException(status_code=403, detail="Access denied")
query = db.query(WebsiteAnalysisExecutionLog).join(
WebsiteAnalysisTask,
WebsiteAnalysisExecutionLog.task_id == WebsiteAnalysisTask.id
).filter(
WebsiteAnalysisTask.user_id == user_id
)
if task_id:
query = query.filter(WebsiteAnalysisExecutionLog.task_id == task_id)
# Get total count
total_count = query.count()
logs = query.order_by(
desc(WebsiteAnalysisExecutionLog.execution_date)
).offset(offset).limit(limit).all()
# Format logs
formatted_logs = []
for log in logs:
# Get task details
task = db.query(WebsiteAnalysisTask).filter(WebsiteAnalysisTask.id == log.task_id).first()
formatted_logs.append({
'id': log.id,
'task_id': log.task_id,
'website_url': task.website_url if task else None,
'task_type': task.task_type if task else None,
'execution_date': log.execution_date.isoformat() if log.execution_date else None,
'status': log.status,
'result_data': log.result_data,
'error_message': log.error_message,
'execution_time_ms': log.execution_time_ms,
'created_at': log.created_at.isoformat() if log.created_at else None
})
return {
'logs': formatted_logs,
'total_count': total_count,
'limit': limit,
'offset': offset,
'has_more': (offset + limit) < total_count
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting website analysis logs for user {user_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to get website analysis logs: {str(e)}")
@router.post("/website-analysis/retry/{task_id}")
async def retry_website_analysis(
task_id: int,
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Manually retry a failed website analysis task.
Args:
task_id: Task ID to retry
Returns:
Success status and updated task details
"""
try:
# Get task
task = db.query(WebsiteAnalysisTask).filter(WebsiteAnalysisTask.id == task_id).first()
if not task:
raise HTTPException(status_code=404, detail="Task not found")
# Verify user can only access their own tasks
if str(current_user.get('id')) != task.user_id:
raise HTTPException(status_code=403, detail="Access denied")
# Reset task status and schedule immediate execution
task.status = 'active'
task.failure_reason = None
task.next_check = datetime.utcnow() # Schedule immediately
task.updated_at = datetime.utcnow()
db.commit()
logger.info(f"Manually retried website analysis task {task_id} for user {task.user_id}")
return {
'success': True,
'message': f'Website analysis task {task_id} scheduled for immediate execution',
'task': {
'id': task.id,
'website_url': task.website_url,
'status': task.status,
'next_check': task.next_check.isoformat() if task.next_check else None
}
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error retrying website analysis task {task_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to retry website analysis: {str(e)}")
@router.get("/platform-insights/logs/{user_id}")
async def get_platform_insights_logs(
user_id: str,
task_id: Optional[int] = Query(None),
limit: int = Query(10, ge=1, le=100),
db: Session = Depends(get_db),
current_user: Dict[str, Any] = Depends(get_current_user)
):
"""
Get execution logs for platform insights tasks.
Args:
user_id: User ID
task_id: Optional task ID to filter logs
limit: Maximum number of logs to return
Returns:
List of execution logs
"""
try:
# Verify user can only access their own data
if str(current_user.get('id')) != user_id:
raise HTTPException(status_code=403, detail="Access denied")
query = db.query(PlatformInsightsExecutionLog).join(
PlatformInsightsTask,
PlatformInsightsExecutionLog.task_id == PlatformInsightsTask.id
).filter(
PlatformInsightsTask.user_id == user_id
)
if task_id:
query = query.filter(PlatformInsightsExecutionLog.task_id == task_id)
logs = query.order_by(
desc(PlatformInsightsExecutionLog.execution_date)
).limit(limit).all()
def format_log(log: PlatformInsightsExecutionLog) -> Dict[str, Any]:
return {
'id': log.id,
'task_id': log.task_id,
'execution_date': log.execution_date.isoformat() if log.execution_date else None,
'status': log.status,
'result_data': log.result_data,
'error_message': log.error_message,
'execution_time_ms': log.execution_time_ms,
'data_source': log.data_source,
'created_at': log.created_at.isoformat() if log.created_at else None
}
return {
'success': True,
'logs': [format_log(log) for log in logs],
'total_count': len(logs)
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting platform insights logs for user {user_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to get platform insights logs: {str(e)}")

View File

@@ -5,18 +5,24 @@ Handles Wix authentication, connection status, and blog publishing.
"""
from fastapi import APIRouter, HTTPException, Depends, Request
from fastapi.responses import HTMLResponse
from typing import Dict, Any, Optional
from loguru import logger
from pydantic import BaseModel
from services.wix_service import WixService
from services.integrations.wix_oauth import WixOAuthService
from middleware.auth_middleware import get_current_user
import os
router = APIRouter(prefix="/api/wix", tags=["Wix Integration"])
# Initialize Wix service
wix_service = WixService()
# Initialize Wix OAuth service for token storage
wix_oauth_service = WixOAuthService(db_path=os.path.abspath("alwrity.db"))
class WixAuthRequest(BaseModel):
"""Request model for Wix authentication"""
@@ -88,17 +94,41 @@ async def handle_oauth_callback(request: WixAuthRequest, current_user: dict = De
Token information and connection status
"""
try:
user_id = current_user.get('id')
if not user_id:
raise HTTPException(status_code=400, detail="User ID not found")
# Exchange code for tokens
tokens = wix_service.exchange_code_for_tokens(request.code)
# Get site information
# Get site information to extract site_id and member_id
site_info = wix_service.get_site_info(tokens['access_token'])
site_id = site_info.get('siteId') or site_info.get('site_id')
# Extract member_id from token if possible
member_id = None
try:
member_id = wix_service.extract_member_id_from_access_token(tokens['access_token'])
except Exception:
pass
# Check permissions
permissions = wix_service.check_blog_permissions(tokens['access_token'])
# TODO: Store tokens securely in database associated with current_user
# For now, we'll return them (in production, store in encrypted database)
# Store tokens securely in database
stored = wix_oauth_service.store_tokens(
user_id=user_id,
access_token=tokens['access_token'],
refresh_token=tokens.get('refresh_token'),
expires_in=tokens.get('expires_in'),
token_type=tokens.get('token_type', 'Bearer'),
scope=tokens.get('scope'),
site_id=site_id,
member_id=member_id
)
if not stored:
logger.warning(f"Failed to store Wix tokens for user {user_id}, but OAuth succeeded")
return {
"success": True,
@@ -125,6 +155,29 @@ async def handle_oauth_callback_get(code: str, state: Optional[str] = None, requ
tokens = wix_service.exchange_code_for_tokens(code)
site_info = wix_service.get_site_info(tokens['access_token'])
permissions = wix_service.check_blog_permissions(tokens['access_token'])
# Store tokens in database if we have user_id
user_id = current_user.get('id') if current_user else None
if user_id:
site_id = site_info.get('siteId') or site_info.get('site_id')
member_id = None
try:
member_id = wix_service.extract_member_id_from_access_token(tokens['access_token'])
except Exception:
pass
stored = wix_oauth_service.store_tokens(
user_id=user_id,
access_token=tokens['access_token'],
refresh_token=tokens.get('refresh_token'),
expires_in=tokens.get('expires_in'),
token_type=tokens.get('token_type', 'Bearer'),
scope=tokens.get('scope'),
site_id=site_id,
member_id=member_id
)
if not stored:
logger.warning(f"Failed to store Wix tokens for user {user_id} in GET callback")
# Build success payload for postMessage
payload = {