Scheduled research persona generation

This commit is contained in:
ajaysi
2025-11-05 08:51:00 +05:30
parent 55087c4f37
commit d99c7c83a7
98 changed files with 14518 additions and 828 deletions

View File

@@ -0,0 +1,310 @@
"""
OAuth Token Monitoring API Routes
Provides endpoints for managing OAuth token monitoring tasks and manual triggers.
"""
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.orm import Session
from typing import List, Dict, Any, Optional
from datetime import datetime
from loguru import logger
from services.database import get_db_session
from middleware.auth_middleware import get_current_user
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask, OAuthTokenExecutionLog
from services.scheduler import get_scheduler
from services.oauth_token_monitoring_service import create_oauth_monitoring_tasks, get_connected_platforms
router = APIRouter(prefix="/api/oauth-tokens", tags=["oauth-tokens"])
@router.get("/status/{user_id}")
async def get_oauth_token_status(
user_id: str,
db: Session = Depends(get_db_session),
current_user: Dict[str, Any] = Depends(get_current_user)
) -> Dict[str, Any]:
"""
Get OAuth token monitoring status for all platforms for a user.
Returns:
- List of monitoring tasks with status
- Connection status for each platform
- Last check time, last success, last failure
"""
try:
# Verify user can only access their own data
if str(current_user.get('id')) != user_id:
raise HTTPException(status_code=403, detail="Access denied")
# Get all monitoring tasks for user
tasks = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.user_id == user_id
).all()
# Get connected platforms
logger.info(f"[OAuth Status API] Getting token status for user: {user_id}")
connected_platforms = get_connected_platforms(user_id)
logger.info(f"[OAuth Status API] Found {len(connected_platforms)} connected platforms: {connected_platforms}")
# Build status response
platform_status = {}
for platform in ['gsc', 'bing', 'wordpress', 'wix']:
task = next((t for t in tasks if t.platform == platform), None)
is_connected = platform in connected_platforms
platform_status[platform] = {
'connected': is_connected,
'monitoring_task': {
'id': task.id if task else None,
'status': task.status if task else 'not_created',
'last_check': task.last_check.isoformat() if task and task.last_check else None,
'last_success': task.last_success.isoformat() if task and task.last_success else None,
'last_failure': task.last_failure.isoformat() if task and task.last_failure else None,
'failure_reason': task.failure_reason if task else None,
'next_check': task.next_check.isoformat() if task and task.next_check else None,
} if task else None
}
logger.info(
f"[OAuth Status API] Platform {platform}: "
f"connected={is_connected}, "
f"task_exists={task is not None}, "
f"task_status={task.status if task else 'N/A'}"
)
response_data = {
"success": True,
"data": {
"user_id": user_id,
"platform_status": platform_status,
"connected_platforms": connected_platforms
}
}
logger.info(f"[OAuth Status API] Returning status for user {user_id}: {len(connected_platforms)} platforms connected")
return response_data
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting OAuth token status for user {user_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to get token status: {str(e)}")
@router.post("/refresh/{user_id}/{platform}")
async def manual_refresh_token(
user_id: str,
platform: str,
db: Session = Depends(get_db_session),
current_user: Dict[str, Any] = Depends(get_current_user)
) -> Dict[str, Any]:
"""
Manually trigger token refresh for a specific platform.
This will:
1. Find or create the monitoring task
2. Execute the token check/refresh immediately
3. Update the task status and next_check time
Args:
user_id: User ID
platform: Platform identifier ('gsc', 'bing', 'wordpress', 'wix')
"""
try:
# Verify user can only access their own data
if str(current_user.get('id')) != user_id:
raise HTTPException(status_code=403, detail="Access denied")
# Validate platform
valid_platforms = ['gsc', 'bing', 'wordpress', 'wix']
if platform not in valid_platforms:
raise HTTPException(
status_code=400,
detail=f"Invalid platform. Must be one of: {', '.join(valid_platforms)}"
)
# Get or create monitoring task
task = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.user_id == user_id,
OAuthTokenMonitoringTask.platform == platform
).first()
if not task:
# Create task if it doesn't exist
task = OAuthTokenMonitoringTask(
user_id=user_id,
platform=platform,
status='active',
next_check=datetime.utcnow(), # Set to now to trigger immediately
created_at=datetime.utcnow(),
updated_at=datetime.utcnow()
)
db.add(task)
db.commit()
db.refresh(task)
logger.info(f"Created monitoring task for manual refresh: user={user_id}, platform={platform}")
# Get scheduler and executor
scheduler = get_scheduler()
try:
executor = scheduler.registry.get_executor('oauth_token_monitoring')
except ValueError:
raise HTTPException(status_code=500, detail="OAuth token monitoring executor not available")
# Execute task immediately
logger.info(f"Manually triggering token refresh: user={user_id}, platform={platform}")
result = await executor.execute_task(task, db)
# Get updated task
db.refresh(task)
return {
"success": result.success,
"message": "Token refresh completed" if result.success else "Token refresh failed",
"data": {
"platform": platform,
"status": task.status,
"last_check": task.last_check.isoformat() if task.last_check else None,
"last_success": task.last_success.isoformat() if task.last_success else None,
"last_failure": task.last_failure.isoformat() if task.last_failure else None,
"failure_reason": task.failure_reason,
"next_check": task.next_check.isoformat() if task.next_check else None,
"execution_result": {
"success": result.success,
"error_message": result.error_message,
"execution_time_ms": result.execution_time_ms,
"result_data": result.result_data
}
}
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error manually refreshing token for user {user_id}, platform {platform}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to refresh token: {str(e)}")
@router.get("/execution-logs/{user_id}")
async def get_execution_logs(
user_id: str,
platform: Optional[str] = Query(None, description="Filter by platform"),
limit: int = Query(50, ge=1, le=100, description="Maximum number of logs"),
offset: int = Query(0, ge=0, description="Offset for pagination"),
db: Session = Depends(get_db_session),
current_user: Dict[str, Any] = Depends(get_current_user)
) -> Dict[str, Any]:
"""
Get execution logs for OAuth token monitoring tasks.
Args:
user_id: User ID
platform: Optional platform filter
limit: Maximum number of logs to return
offset: Pagination offset
"""
try:
# Verify user can only access their own data
if str(current_user.get('id')) != user_id:
raise HTTPException(status_code=403, detail="Access denied")
# Build query
query = db.query(OAuthTokenExecutionLog).join(
OAuthTokenMonitoringTask,
OAuthTokenExecutionLog.task_id == OAuthTokenMonitoringTask.id
).filter(
OAuthTokenMonitoringTask.user_id == user_id
)
# Apply platform filter if provided
if platform:
query = query.filter(OAuthTokenMonitoringTask.platform == platform)
# Get total count
total_count = query.count()
# Get paginated logs
logs = query.order_by(
OAuthTokenExecutionLog.execution_date.desc()
).offset(offset).limit(limit).all()
# Format logs
logs_data = []
for log in logs:
logs_data.append({
"id": log.id,
"task_id": log.task_id,
"platform": log.task.platform, # Get platform from relationship
"execution_date": log.execution_date.isoformat(),
"status": log.status,
"result_data": log.result_data,
"error_message": log.error_message,
"execution_time_ms": log.execution_time_ms,
"created_at": log.created_at.isoformat()
})
return {
"success": True,
"data": {
"logs": logs_data,
"total_count": total_count,
"limit": limit,
"offset": offset
}
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting execution logs for user {user_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to get execution logs: {str(e)}")
@router.post("/create-tasks/{user_id}")
async def create_monitoring_tasks(
user_id: str,
platforms: Optional[List[str]] = None,
db: Session = Depends(get_db_session),
current_user: Dict[str, Any] = Depends(get_current_user)
) -> Dict[str, Any]:
"""
Manually create OAuth token monitoring tasks for a user.
If platforms are not provided, automatically detects connected platforms.
Args:
user_id: User ID
platforms: Optional list of platforms to create tasks for
"""
try:
# Verify user can only access their own data
if str(current_user.get('id')) != user_id:
raise HTTPException(status_code=403, detail="Access denied")
# Create tasks
tasks = create_oauth_monitoring_tasks(user_id, db, platforms)
return {
"success": True,
"message": f"Created {len(tasks)} monitoring task(s)",
"data": {
"tasks_created": len(tasks),
"tasks": [
{
"id": task.id,
"platform": task.platform,
"status": task.status,
"next_check": task.next_check.isoformat() if task.next_check else None
}
for task in tasks
]
}
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error creating monitoring tasks for user {user_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to create monitoring tasks: {str(e)}")

View File

@@ -12,6 +12,9 @@ from services.onboarding.progress_service import get_onboarding_progress_service
from services.onboarding.database_service import OnboardingDatabaseService
from services.database import get_db
from services.persona_analysis_service import PersonaAnalysisService
from services.research.research_persona_scheduler import schedule_research_persona_generation
from services.persona.facebook.facebook_persona_scheduler import schedule_facebook_persona_generation
from services.oauth_token_monitoring_service import create_oauth_monitoring_tasks
class OnboardingCompletionService:
"""Service for handling onboarding completion logic."""
@@ -46,6 +49,38 @@ class OnboardingCompletionService:
if not success:
raise HTTPException(status_code=500, detail="Failed to mark onboarding as complete")
# Schedule research persona generation 20 minutes after onboarding completion
try:
schedule_research_persona_generation(user_id, delay_minutes=20)
logger.info(f"Scheduled research persona generation for user {user_id} (20 minutes after onboarding)")
except Exception as e:
# Non-critical: log but don't fail onboarding completion
logger.warning(f"Failed to schedule research persona generation for user {user_id}: {e}")
# Schedule Facebook persona generation 20 minutes after onboarding completion
try:
schedule_facebook_persona_generation(user_id, delay_minutes=20)
logger.info(f"Scheduled Facebook persona generation for user {user_id} (20 minutes after onboarding)")
except Exception as e:
# Non-critical: log but don't fail onboarding completion
logger.warning(f"Failed to schedule Facebook persona generation for user {user_id}: {e}")
# Create OAuth token monitoring tasks for connected platforms
try:
from services.database import SessionLocal
db = SessionLocal()
try:
monitoring_tasks = create_oauth_monitoring_tasks(user_id, db)
logger.info(
f"Created {len(monitoring_tasks)} OAuth token monitoring tasks for user {user_id} "
f"on onboarding completion"
)
finally:
db.close()
except Exception as e:
# Non-critical: log but don't fail onboarding completion
logger.warning(f"Failed to create OAuth token monitoring tasks for user {user_id}: {e}")
return {
"message": "Onboarding completed successfully",
"completed_at": datetime.now().isoformat(),

View File

@@ -380,6 +380,41 @@ async def generate_platform_persona(user_id: str, platform: str, db_session):
logger.error(f"Error generating {platform} persona: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to generate {platform} persona: {str(e)}")
async def check_facebook_persona(user_id: str, db: Session):
"""Check if Facebook persona exists for user."""
try:
from services.persona_data_service import PersonaDataService
persona_data_service = PersonaDataService(db_session=db)
persona_data = persona_data_service.get_user_persona_data(user_id)
if not persona_data:
return {
"has_persona": False,
"has_core_persona": False,
"message": "No persona data found",
"onboarding_completed": False
}
platform_personas = persona_data.get('platform_personas', {})
facebook_persona = platform_personas.get('facebook') if platform_personas else None
# Check if core persona exists
has_core_persona = bool(persona_data.get('core_persona'))
# Assume onboarding is completed if persona data exists
onboarding_completed = True
return {
"has_persona": bool(facebook_persona),
"has_core_persona": has_core_persona,
"persona": facebook_persona,
"onboarding_completed": onboarding_completed
}
except Exception as e:
logger.error(f"Error checking Facebook persona for user {user_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
async def validate_persona_generation_readiness(user_id: int):
"""Check if user has sufficient onboarding data for persona generation."""
try:

View File

@@ -36,7 +36,7 @@ from api.persona import (
)
from services.persona_replication_engine import PersonaReplicationEngine
from api.persona import update_platform_persona, generate_platform_persona
from api.persona import update_platform_persona, generate_platform_persona, check_facebook_persona
# Create router
router = APIRouter(prefix="/api/personas", tags=["personas"])
@@ -248,4 +248,12 @@ async def update_platform_persona_endpoint(
Allows editing persona fields in the UI and saving them to the database.
"""
# Beta testing: Force user_id=1 for all requests
return await update_platform_persona(1, platform, update_data)
return await update_platform_persona(1, platform, update_data)
@router.get("/facebook-persona/check/{user_id}")
async def check_facebook_persona_endpoint(
user_id: str,
db: Session = Depends(get_db)
):
"""Check if Facebook persona exists for user."""
return await check_facebook_persona(user_id, db)

View File

@@ -0,0 +1,398 @@
"""
Research Configuration API
Provides provider availability and persona-aware defaults for research.
"""
from fastapi import APIRouter, Depends, HTTPException, Query
from typing import Dict, Any, Optional
from loguru import logger
from pydantic import BaseModel
from middleware.auth_middleware import get_current_user
from services.user_api_key_context import get_exa_key, get_gemini_key
from services.onboarding.database_service import OnboardingDatabaseService
from services.onboarding.progress_service import get_onboarding_progress_service
from services.database import get_db
from sqlalchemy.orm import Session
from services.research.research_persona_service import ResearchPersonaService
from services.research.research_persona_scheduler import schedule_research_persona_generation
from models.research_persona_models import ResearchPersona
router = APIRouter()
class ProviderAvailability(BaseModel):
"""Provider availability status."""
google_available: bool
exa_available: bool
gemini_key_status: str # 'configured' | 'missing'
exa_key_status: str # 'configured' | 'missing'
class PersonaDefaults(BaseModel):
"""Persona-aware research defaults."""
industry: Optional[str] = None
target_audience: Optional[str] = None
suggested_domains: list[str] = []
suggested_exa_category: Optional[str] = None
class ResearchConfigResponse(BaseModel):
"""Combined research configuration response."""
provider_availability: ProviderAvailability
persona_defaults: PersonaDefaults
research_persona: Optional[ResearchPersona] = None
onboarding_completed: bool = False
persona_scheduled: bool = False
@router.get("/provider-availability", response_model=ProviderAvailability)
async def get_provider_availability(
current_user: Dict = Depends(get_current_user)
):
"""
Check which research providers are available for the current user.
Returns:
- google_available: True if Gemini key is configured
- exa_available: True if Exa key is configured
- Key status for each provider
"""
try:
user_id = str(current_user.get('id'))
# Check API key availability
gemini_key = get_gemini_key(user_id)
exa_key = get_exa_key(user_id)
google_available = bool(gemini_key and gemini_key.strip())
exa_available = bool(exa_key and exa_key.strip())
return ProviderAvailability(
google_available=google_available,
exa_available=exa_available,
gemini_key_status='configured' if google_available else 'missing',
exa_key_status='configured' if exa_available else 'missing'
)
except Exception as e:
logger.error(f"[ResearchConfig] Error checking provider availability for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to check provider availability: {str(e)}")
@router.get("/persona-defaults", response_model=PersonaDefaults)
async def get_persona_defaults(
current_user: Dict = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get persona-aware research defaults for the current user.
Returns industry, target audience, and smart suggestions based on onboarding data.
"""
try:
user_id = str(current_user.get('id'))
# Add explicit null check for database session
if not db:
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_persona_defaults")
# Return defaults rather than error
return PersonaDefaults()
db_service = OnboardingDatabaseService(db=db)
# Try to get persona data first (most reliable source for industry/target_audience)
persona_data = db_service.get_persona_data(user_id, db)
industry = 'General'
target_audience = 'General'
if persona_data:
core_persona = persona_data.get('corePersona') or persona_data.get('core_persona')
if core_persona:
if core_persona.get('industry'):
industry = core_persona['industry']
if core_persona.get('target_audience'):
target_audience = core_persona['target_audience']
# Fallback to website analysis if persona data doesn't have industry info
if industry == 'General':
website_analysis = db_service.get_website_analysis(user_id, db)
if website_analysis:
target_audience_data = website_analysis.get('target_audience', {})
if isinstance(target_audience_data, dict):
# Extract from target_audience JSON field
industry_focus = target_audience_data.get('industry_focus')
if industry_focus:
industry = industry_focus
demographics = target_audience_data.get('demographics')
if demographics:
target_audience = demographics if isinstance(demographics, str) else str(demographics)
# Suggest domains based on industry
suggested_domains = _get_domain_suggestions(industry)
# Suggest Exa category based on industry
suggested_exa_category = _get_exa_category_suggestion(industry)
return PersonaDefaults(
industry=industry,
target_audience=target_audience,
suggested_domains=suggested_domains,
suggested_exa_category=suggested_exa_category
)
except Exception as e:
logger.error(f"[ResearchConfig] Error getting persona defaults for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
# Return defaults rather than error
return PersonaDefaults()
@router.get("/research-persona")
async def get_research_persona(
current_user: Dict = Depends(get_current_user),
db: Session = Depends(get_db),
force_refresh: bool = Query(False, description="Force regenerate persona even if cache is valid")
):
"""
Get or generate research persona for the current user.
Query params:
- force_refresh: If true, regenerate persona even if cache is valid (default: false)
Returns research persona with personalized defaults, suggestions, and configurations.
"""
try:
user_id = str(current_user.get('id'))
if not user_id:
raise HTTPException(status_code=401, detail="User not authenticated")
# Add explicit null check for database session
if not db:
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_research_persona")
raise HTTPException(status_code=500, detail="Database not available")
persona_service = ResearchPersonaService(db_session=db)
research_persona = persona_service.get_or_generate(user_id, force_refresh=force_refresh)
if not research_persona:
raise HTTPException(
status_code=404,
detail="Research persona not available. Complete onboarding to generate one."
)
return research_persona.dict()
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) to preserve status code and details
raise
except Exception as e:
logger.error(f"[ResearchConfig] Error getting research persona for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to get research persona: {str(e)}")
@router.get("/config", response_model=ResearchConfigResponse)
async def get_research_config(
current_user: Dict = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get complete research configuration including provider availability and persona defaults.
"""
user_id = None
try:
user_id = str(current_user.get('id'))
logger.info(f"[ResearchConfig] Starting get_research_config for user {user_id}")
# Add explicit null check for database session
if not db:
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_research_config")
raise HTTPException(status_code=500, detail="Database session not available")
# Get provider availability
logger.debug(f"[ResearchConfig] Getting provider availability for user {user_id}")
gemini_key = get_gemini_key(user_id)
exa_key = get_exa_key(user_id)
google_available = bool(gemini_key and gemini_key.strip())
exa_available = bool(exa_key and exa_key.strip())
provider_availability = ProviderAvailability(
google_available=google_available,
exa_available=exa_available,
gemini_key_status='configured' if google_available else 'missing',
exa_key_status='configured' if exa_available else 'missing'
)
# Get persona defaults
logger.debug(f"[ResearchConfig] Getting persona defaults for user {user_id}")
db_service = OnboardingDatabaseService(db=db)
# Try to get persona data first (most reliable source for industry/target_audience)
try:
persona_data = db_service.get_persona_data(user_id, db)
except Exception as e:
logger.error(f"[ResearchConfig] Error getting persona data for user {user_id}: {e}", exc_info=True)
persona_data = None
industry = 'General'
target_audience = 'General'
if persona_data:
core_persona = persona_data.get('corePersona') or persona_data.get('core_persona')
if core_persona:
if core_persona.get('industry'):
industry = core_persona['industry']
if core_persona.get('target_audience'):
target_audience = core_persona['target_audience']
# Fallback to website analysis if persona data doesn't have industry info
if industry == 'General':
website_analysis = db_service.get_website_analysis(user_id, db)
if website_analysis:
target_audience_data = website_analysis.get('target_audience', {})
if isinstance(target_audience_data, dict):
# Extract from target_audience JSON field
industry_focus = target_audience_data.get('industry_focus')
if industry_focus:
industry = industry_focus
demographics = target_audience_data.get('demographics')
if demographics:
target_audience = demographics if isinstance(demographics, str) else str(demographics)
persona_defaults = PersonaDefaults(
industry=industry,
target_audience=target_audience,
suggested_domains=_get_domain_suggestions(industry),
suggested_exa_category=_get_exa_category_suggestion(industry)
)
# Check onboarding completion status
onboarding_completed = False
try:
logger.debug(f"[ResearchConfig] Checking onboarding status for user {user_id}")
progress_service = get_onboarding_progress_service()
onboarding_status = progress_service.get_onboarding_status(user_id)
onboarding_completed = onboarding_status.get('is_completed', False)
logger.info(
f"[ResearchConfig] Onboarding status check for user {user_id}: "
f"is_completed={onboarding_completed}, "
f"current_step={onboarding_status.get('current_step')}, "
f"progress={onboarding_status.get('completion_percentage')}"
)
except Exception as e:
logger.error(f"[ResearchConfig] Could not check onboarding status for user {user_id}: {e}", exc_info=True)
# Continue with onboarding_completed=False
# Get research persona (optional, may not exist for all users)
# CRITICAL: Use get_cached_only() to avoid triggering rate limit checks
# Only return persona if it's already cached - don't generate on config load
research_persona = None
persona_scheduled = False
try:
logger.debug(f"[ResearchConfig] Getting cached research persona for user {user_id}")
persona_service = ResearchPersonaService(db_session=db)
research_persona = persona_service.get_cached_only(user_id)
logger.info(
f"[ResearchConfig] Research persona check for user {user_id}: "
f"persona_exists={research_persona is not None}, "
f"onboarding_completed={onboarding_completed}"
)
# If onboarding is completed but persona doesn't exist, schedule generation
if onboarding_completed and not research_persona:
try:
# Check if persona data exists (to ensure we have data to generate from)
db_service = OnboardingDatabaseService(db=db)
persona_data = db_service.get_persona_data(user_id, db)
if persona_data and (persona_data.get('corePersona') or persona_data.get('platformPersonas') or
persona_data.get('core_persona') or persona_data.get('platform_personas')):
# Schedule persona generation (20 minutes from now)
schedule_research_persona_generation(user_id, delay_minutes=20)
logger.info(f"Scheduled research persona generation for user {user_id} (onboarding already completed)")
persona_scheduled = True
else:
logger.info(f"Onboarding completed but no persona data found for user {user_id} - cannot schedule persona generation")
except Exception as e:
logger.warning(f"Failed to schedule research persona generation: {e}", exc_info=True)
except Exception as e:
# get_cached_only() never raises HTTPException, but catch any unexpected errors
logger.warning(f"[ResearchConfig] Could not load cached research persona for user {user_id}: {e}", exc_info=True)
# FastAPI will automatically serialize the ResearchPersona Pydantic model
# If there's a serialization issue, we catch it and log it
try:
response = ResearchConfigResponse(
provider_availability=provider_availability,
persona_defaults=persona_defaults,
research_persona=research_persona,
onboarding_completed=onboarding_completed,
persona_scheduled=persona_scheduled
)
except Exception as serialization_error:
logger.error(f"[ResearchConfig] Failed to create ResearchConfigResponse for user {user_id}: {serialization_error}", exc_info=True)
# Try without research_persona as fallback
response = ResearchConfigResponse(
provider_availability=provider_availability,
persona_defaults=persona_defaults,
research_persona=None,
onboarding_completed=onboarding_completed,
persona_scheduled=persona_scheduled
)
logger.info(
f"[ResearchConfig] Response for user {user_id}: "
f"onboarding_completed={onboarding_completed}, "
f"persona_exists={research_persona is not None}, "
f"persona_scheduled={persona_scheduled}"
)
return response
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429, 401, etc.) to preserve status codes
raise
except Exception as e:
logger.error(f"[ResearchConfig] CRITICAL ERROR getting research config for user {user_id if user_id else 'unknown'}: {e}", exc_info=True)
import traceback
logger.error(f"[ResearchConfig] Full traceback:\n{traceback.format_exc()}")
raise HTTPException(
status_code=500,
detail=f"Failed to get research config: {str(e)}"
)
# Helper functions from RESEARCH_AI_HYPERPERSONALIZATION.md
def _get_domain_suggestions(industry: str) -> list[str]:
"""Get domain suggestions based on industry."""
domain_map = {
'Healthcare': ['pubmed.gov', 'nejm.org', 'thelancet.com', 'nih.gov'],
'Technology': ['techcrunch.com', 'wired.com', 'arstechnica.com', 'theverge.com'],
'Finance': ['wsj.com', 'bloomberg.com', 'ft.com', 'reuters.com'],
'Science': ['nature.com', 'sciencemag.org', 'cell.com', 'pnas.org'],
'Business': ['hbr.org', 'forbes.com', 'businessinsider.com', 'mckinsey.com'],
'Marketing': ['marketingland.com', 'adweek.com', 'hubspot.com', 'moz.com'],
'Education': ['edutopia.org', 'chronicle.com', 'insidehighered.com'],
'Real Estate': ['realtor.com', 'zillow.com', 'forbes.com'],
'Entertainment': ['variety.com', 'hollywoodreporter.com', 'deadline.com'],
'Travel': ['lonelyplanet.com', 'nationalgeographic.com', 'travelandleisure.com'],
'Fashion': ['vogue.com', 'elle.com', 'wwd.com'],
'Sports': ['espn.com', 'si.com', 'bleacherreport.com'],
'Law': ['law.com', 'abajournal.com', 'scotusblog.com'],
}
return domain_map.get(industry, [])
def _get_exa_category_suggestion(industry: str) -> Optional[str]:
"""Get Exa category suggestion based on industry."""
category_map = {
'Healthcare': 'research paper',
'Science': 'research paper',
'Finance': 'financial report',
'Technology': 'company',
'Business': 'company',
'Marketing': 'company',
'Education': 'research paper',
'Law': 'pdf',
}
return category_map.get(industry)

View File

@@ -0,0 +1,706 @@
"""
Scheduler Dashboard API
Provides endpoints for scheduler dashboard UI.
"""
from fastapi import APIRouter, HTTPException, Depends, Query
from typing import Dict, Any, Optional, List
from sqlalchemy.orm import Session, joinedload
from sqlalchemy import desc, func
from datetime import datetime
from loguru import logger
from services.scheduler import get_scheduler
from services.scheduler.utils.user_job_store import get_user_job_store_name
from services.monitoring_data_service import MonitoringDataService
from services.database import get_db
from middleware.auth_middleware import get_current_user
from models.monitoring_models import TaskExecutionLog, MonitoringTask
from models.scheduler_models import SchedulerEventLog
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
from sqlalchemy import func
router = APIRouter(prefix="/api/scheduler", tags=["scheduler-dashboard"])
@router.get("/dashboard")
async def get_scheduler_dashboard(
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get scheduler dashboard statistics and current state.
Returns:
- Scheduler stats (total checks, tasks executed, failed, etc.)
- Current scheduled jobs
- Active strategies count
- Check interval
- User isolation status
- Last check timestamp
"""
try:
scheduler = get_scheduler()
# Get user_id from current_user (Clerk format)
user_id_str = str(current_user.get('id', '')) if current_user else None
# Get scheduler stats
stats = scheduler.get_stats(user_id=None) # Get all stats for dashboard
# Get all scheduled jobs
all_jobs = scheduler.scheduler.get_jobs()
# Format jobs with user context
formatted_jobs = []
for job in all_jobs:
job_info = {
'id': job.id,
'trigger_type': type(job.trigger).__name__,
'next_run_time': job.next_run_time.isoformat() if job.next_run_time else None,
'user_id': None,
'job_store': 'default',
'user_job_store': 'default'
}
# Extract user_id from job
user_id_from_job = None
if hasattr(job, 'kwargs') and job.kwargs and job.kwargs.get('user_id'):
user_id_from_job = job.kwargs.get('user_id')
elif job.id and ('research_persona_' in job.id or 'facebook_persona_' in job.id):
parts = job.id.split('_')
if len(parts) >= 3:
user_id_from_job = parts[2]
if user_id_from_job:
job_info['user_id'] = user_id_from_job
try:
user_job_store = get_user_job_store_name(user_id_from_job, db)
job_info['user_job_store'] = user_job_store
except Exception as e:
logger.debug(f"Could not get job store for user {user_id_from_job}: {e}")
formatted_jobs.append(job_info)
# Add OAuth token monitoring tasks from database (these are recurring weekly tasks)
try:
oauth_tasks = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.status == 'active'
).all()
oauth_tasks_count = len(oauth_tasks)
if oauth_tasks_count > 0:
# Log platform breakdown for debugging
platforms = {}
for task in oauth_tasks:
platforms[task.platform] = platforms.get(task.platform, 0) + 1
platform_summary = ", ".join([f"{platform}: {count}" for platform, count in platforms.items()])
logger.warning(
f"[Dashboard] OAuth Monitoring: Found {oauth_tasks_count} active OAuth token monitoring tasks "
f"({platform_summary})"
)
else:
# Check if there are any inactive tasks
all_oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
if all_oauth_tasks:
inactive_by_status = {}
for task in all_oauth_tasks:
status = task.status
inactive_by_status[status] = inactive_by_status.get(status, 0) + 1
logger.warning(
f"[Dashboard] OAuth Monitoring: Found {len(all_oauth_tasks)} total OAuth tasks, "
f"but {oauth_tasks_count} are active. Status breakdown: {inactive_by_status}"
)
for task in oauth_tasks:
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
user_job_store = 'default'
logger.debug(f"Could not get job store for user {task.user_id}: {e}")
# Format as recurring weekly job
job_info = {
'id': f"oauth_token_monitoring_{task.platform}_{task.user_id}",
'trigger_type': 'CronTrigger', # Weekly recurring
'next_run_time': task.next_check.isoformat() if task.next_check else None,
'user_id': task.user_id,
'job_store': 'default',
'user_job_store': user_job_store,
'function_name': 'oauth_token_monitoring_executor.execute_task',
'platform': task.platform,
'task_id': task.id,
'is_database_task': True, # Flag to indicate this is a DB task, not APScheduler job
'frequency': 'Weekly'
}
formatted_jobs.append(job_info)
except Exception as e:
logger.error(f"Error loading OAuth token monitoring tasks: {e}", exc_info=True)
# Get active strategies count
active_strategies = stats.get('active_strategies_count', 0)
# Get last_update from stats (added by scheduler for frontend polling)
last_update = stats.get('last_update')
# Calculate cumulative/historical values from scheduler_event_logs
cumulative_stats = {}
try:
# First, check total events in database for debugging
total_events = db.query(func.count(SchedulerEventLog.id)).scalar() or 0
# Check for check_cycle events specifically
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'check_cycle'
).scalar() or 0
# Also check for other event types that might have task counts
job_failed_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'job_failed'
).scalar() or 0
job_completed_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'job_completed'
).scalar() or 0
logger.warning(
f"[Dashboard] Database stats: {total_events} total events, "
f"{check_cycle_count} check_cycles, {job_failed_count} job_failed, "
f"{job_completed_count} job_completed"
)
if check_cycle_count > 0:
logger.warning(f"[Dashboard] Found {check_cycle_count} check cycle events in database")
# Aggregate check cycle events for cumulative totals
result = db.query(
func.count(SchedulerEventLog.id),
func.sum(SchedulerEventLog.tasks_found),
func.sum(SchedulerEventLog.tasks_executed),
func.sum(SchedulerEventLog.tasks_failed)
).filter(
SchedulerEventLog.event_type == 'check_cycle'
).first()
if result:
# SQLAlchemy returns tuple for multi-column queries
# SUM returns NULL when no rows, handle that
total_cycles = result[0] if result[0] is not None else 0
total_found = result[1] if result[1] is not None else 0
total_executed = result[2] if result[2] is not None else 0
total_failed = result[3] if result[3] is not None else 0
cumulative_stats = {
'total_check_cycles': int(total_cycles),
'cumulative_tasks_found': int(total_found),
'cumulative_tasks_executed': int(total_executed),
'cumulative_tasks_failed': int(total_failed)
}
logger.warning(f"[Dashboard] Cumulative stats from check_cycles: {cumulative_stats}")
else:
# No results (shouldn't happen with COUNT, but handle it)
cumulative_stats = {
'total_check_cycles': 0,
'cumulative_tasks_found': 0,
'cumulative_tasks_executed': 0,
'cumulative_tasks_failed': 0
}
logger.warning("[Dashboard] Query returned None (no check cycle events)")
else:
# No check cycles yet, but we can still show job counts
# Log detailed info about why cumulative stats are 0
if stats.get('total_checks', 0) > 0:
logger.warning(
f"[Dashboard] ⚠️ Scheduler shows {stats.get('total_checks', 0)} checks in memory, "
f"but NO check_cycle events found in database. "
f"This suggests check_cycle events are not being saved properly."
)
else:
logger.warning(
f"[Dashboard] No check_cycle events yet. "
f"Scheduler interval: {stats.get('check_interval_minutes', 60)}min. "
f"First check cycle will run after interval expires. "
f"One-time jobs: {job_completed_count} completed, {job_failed_count} failed"
)
except Exception as e:
logger.error(f"Error calculating cumulative stats: {e}", exc_info=True)
cumulative_stats = {
'total_check_cycles': 0,
'cumulative_tasks_found': 0,
'cumulative_tasks_executed': 0,
'cumulative_tasks_failed': 0
}
return {
'stats': {
# Current session stats (from scheduler memory)
'total_checks': stats.get('total_checks', 0),
'tasks_found': stats.get('tasks_found', 0),
'tasks_executed': stats.get('tasks_executed', 0),
'tasks_failed': stats.get('tasks_failed', 0),
'tasks_skipped': stats.get('tasks_skipped', 0),
'last_check': stats.get('last_check'),
'last_update': last_update, # Include for frontend polling
'active_executions': stats.get('active_executions', 0),
'running': stats.get('running', False),
'check_interval_minutes': stats.get('check_interval_minutes', 60),
'min_check_interval_minutes': stats.get('min_check_interval_minutes', 15),
'max_check_interval_minutes': stats.get('max_check_interval_minutes', 60),
'intelligent_scheduling': stats.get('intelligent_scheduling', True),
'active_strategies_count': active_strategies,
'last_interval_adjustment': stats.get('last_interval_adjustment'),
'registered_types': stats.get('registered_types', []),
# Cumulative/historical stats (from database)
'cumulative_total_check_cycles': cumulative_stats.get('total_check_cycles', 0),
'cumulative_tasks_found': cumulative_stats.get('cumulative_tasks_found', 0),
'cumulative_tasks_executed': cumulative_stats.get('cumulative_tasks_executed', 0),
'cumulative_tasks_failed': cumulative_stats.get('cumulative_tasks_failed', 0)
},
'jobs': formatted_jobs,
'job_count': len(formatted_jobs),
'recurring_jobs': 1 + len([j for j in formatted_jobs if j.get('is_database_task')]), # check_due_tasks + OAuth tasks
'one_time_jobs': len([j for j in formatted_jobs if not j.get('is_database_task') and j.get('trigger_type') == 'DateTrigger']),
'user_isolation': {
'enabled': True,
'current_user_id': user_id_str
},
'last_updated': datetime.utcnow().isoformat() # Keep for backward compatibility
}
except Exception as e:
logger.error(f"Error getting scheduler dashboard: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get scheduler dashboard: {str(e)}")
@router.get("/execution-logs")
async def get_execution_logs(
limit: int = Query(50, ge=1, le=500),
offset: int = Query(0, ge=0),
status: Optional[str] = Query(None, regex="^(success|failed|running|skipped)$"),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get task execution logs from database.
Query Params:
- limit: Number of logs to return (1-500, default: 50)
- offset: Pagination offset (default: 0)
- status: Filter by status (success, failed, running, skipped)
Returns:
- List of execution logs with task details
- Total count for pagination
"""
try:
# Get user_id from current_user (Clerk format - convert to int if needed)
user_id_str = str(current_user.get('id', '')) if current_user else None
# Check if user_id column exists in the database
from sqlalchemy import inspect
inspector = inspect(db.bind)
columns = [col['name'] for col in inspector.get_columns('task_execution_logs')]
has_user_id_column = 'user_id' in columns
# If user_id column doesn't exist, we need to handle the query differently
# to avoid SQLAlchemy trying to access a non-existent column
if not has_user_id_column:
# Query without user_id column - use explicit column selection
from sqlalchemy import func
# Build query for count
count_query = db.query(func.count(TaskExecutionLog.id)).join(
MonitoringTask,
TaskExecutionLog.task_id == MonitoringTask.id
)
# Filter by status if provided
if status:
count_query = count_query.filter(TaskExecutionLog.status == status)
total_count = count_query.scalar() or 0
# Build query for data - select specific columns to avoid user_id
query = db.query(
TaskExecutionLog.id,
TaskExecutionLog.task_id,
TaskExecutionLog.execution_date,
TaskExecutionLog.status,
TaskExecutionLog.result_data,
TaskExecutionLog.error_message,
TaskExecutionLog.execution_time_ms,
TaskExecutionLog.created_at,
MonitoringTask
).join(
MonitoringTask,
TaskExecutionLog.task_id == MonitoringTask.id
)
# Filter by status if provided
if status:
query = query.filter(TaskExecutionLog.status == status)
# Get paginated results
logs = query.order_by(TaskExecutionLog.execution_date.desc()).offset(offset).limit(limit).all()
# Format results for compatibility
formatted_logs = []
for log_tuple in logs:
# Unpack the tuple
log_id, task_id, execution_date, log_status, result_data, error_message, execution_time_ms, created_at, task = log_tuple
log_data = {
'id': log_id,
'task_id': task_id,
'user_id': None, # No user_id column in database
'execution_date': execution_date.isoformat() if execution_date else None,
'status': log_status,
'error_message': error_message,
'execution_time_ms': execution_time_ms,
'result_data': result_data,
'created_at': created_at.isoformat() if created_at else None
}
# Add task details
if task:
log_data['task'] = {
'id': task.id,
'task_title': task.task_title,
'component_name': task.component_name,
'metric': task.metric,
'frequency': task.frequency
}
formatted_logs.append(log_data)
return {
'logs': formatted_logs,
'total_count': total_count,
'limit': limit,
'offset': offset,
'has_more': (offset + limit) < total_count,
'is_scheduler_logs': False # Explicitly mark as execution logs, not scheduler logs
}
# If user_id column exists, use the normal query path
# Build query with eager loading of task relationship
query = db.query(TaskExecutionLog).join(
MonitoringTask,
TaskExecutionLog.task_id == MonitoringTask.id
).options(
joinedload(TaskExecutionLog.task)
)
# Filter by status if provided
if status:
query = query.filter(TaskExecutionLog.status == status)
# Filter by user_id if provided (for user isolation)
if user_id_str and has_user_id_column:
# Note: user_id in TaskExecutionLog is Integer, but we have Clerk string
# For now, get all logs - can enhance later with user_id mapping
pass
# Get total count
total_count = query.count()
# Get paginated results
logs = query.order_by(desc(TaskExecutionLog.execution_date)).offset(offset).limit(limit).all()
# Format results
formatted_logs = []
for log in logs:
log_data = {
'id': log.id,
'task_id': log.task_id,
'user_id': log.user_id if has_user_id_column else None,
'execution_date': log.execution_date.isoformat() if log.execution_date else None,
'status': log.status,
'error_message': log.error_message,
'execution_time_ms': log.execution_time_ms,
'result_data': log.result_data,
'created_at': log.created_at.isoformat() if log.created_at else None
}
# Add task details if available
if log.task:
log_data['task'] = {
'id': log.task.id,
'task_title': log.task.task_title,
'component_name': log.task.component_name,
'metric': log.task.metric,
'frequency': log.task.frequency
}
formatted_logs.append(log_data)
return {
'logs': formatted_logs,
'total_count': total_count,
'limit': limit,
'offset': offset,
'has_more': (offset + limit) < total_count,
'is_scheduler_logs': False # Explicitly mark as execution logs, not scheduler logs
}
except Exception as e:
logger.error(f"Error getting execution logs: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get execution logs: {str(e)}")
@router.get("/jobs")
async def get_scheduler_jobs(
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get detailed information about all scheduled jobs.
Returns:
- List of jobs with detailed information
- Job ID, trigger type, next run time
- User context (extracted from job ID/kwargs)
- Job store name (from user's website root)
"""
try:
scheduler = get_scheduler()
all_jobs = scheduler.scheduler.get_jobs()
formatted_jobs = []
for job in all_jobs:
job_info = {
'id': job.id,
'trigger_type': type(job.trigger).__name__,
'next_run_time': job.next_run_time.isoformat() if job.next_run_time else None,
'jobstore': getattr(job, 'jobstore', 'default'),
'user_id': None,
'user_job_store': 'default',
'function_name': None
}
# Extract user_id from job
user_id_from_job = None
if hasattr(job, 'kwargs') and job.kwargs and job.kwargs.get('user_id'):
user_id_from_job = job.kwargs.get('user_id')
elif job.id and ('research_persona_' in job.id or 'facebook_persona_' in job.id):
parts = job.id.split('_')
if len(parts) >= 3:
user_id_from_job = parts[2]
if user_id_from_job:
job_info['user_id'] = user_id_from_job
try:
user_job_store = get_user_job_store_name(user_id_from_job, db)
job_info['user_job_store'] = user_job_store
except Exception as e:
logger.debug(f"Could not get job store for user {user_id_from_job}: {e}")
# Get function name if available
if hasattr(job, 'func') and hasattr(job.func, '__name__'):
job_info['function_name'] = job.func.__name__
elif hasattr(job, 'func_ref'):
job_info['function_name'] = str(job.func_ref)
formatted_jobs.append(job_info)
return {
'jobs': formatted_jobs,
'total_jobs': len(formatted_jobs),
'recurring_jobs': 1, # check_due_tasks
'one_time_jobs': len(formatted_jobs) - 1
}
except Exception as e:
logger.error(f"Error getting scheduler jobs: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get scheduler jobs: {str(e)}")
@router.get("/event-history")
async def get_scheduler_event_history(
limit: int = Query(100, ge=1, le=1000),
offset: int = Query(0, ge=0),
event_type: Optional[str] = Query(None, regex="^(check_cycle|interval_adjustment|start|stop|job_scheduled|job_cancelled|job_completed|job_failed)$"),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get scheduler event history from database.
This endpoint returns historical scheduler events such as:
- Check cycles (when scheduler runs and checks for due tasks)
- Interval adjustments (when check interval changes)
- Scheduler start/stop events
- Job scheduled/cancelled events
Query Params:
- limit: Number of events to return (1-1000, default: 100)
- offset: Pagination offset (default: 0)
- event_type: Filter by event type (check_cycle, interval_adjustment, start, stop, etc.)
Returns:
- List of scheduler events with details
- Total count for pagination
"""
try:
# Build query
query = db.query(SchedulerEventLog)
# Filter by event type if provided
if event_type:
query = query.filter(SchedulerEventLog.event_type == event_type)
# Get total count
total_count = query.count()
# Get paginated results (most recent first)
events = query.order_by(desc(SchedulerEventLog.event_date)).offset(offset).limit(limit).all()
# Format results
formatted_events = []
for event in events:
event_data = {
'id': event.id,
'event_type': event.event_type,
'event_date': event.event_date.isoformat() if event.event_date else None,
'check_cycle_number': event.check_cycle_number,
'check_interval_minutes': event.check_interval_minutes,
'previous_interval_minutes': event.previous_interval_minutes,
'new_interval_minutes': event.new_interval_minutes,
'tasks_found': event.tasks_found,
'tasks_executed': event.tasks_executed,
'tasks_failed': event.tasks_failed,
'tasks_by_type': event.tasks_by_type,
'check_duration_seconds': event.check_duration_seconds,
'active_strategies_count': event.active_strategies_count,
'active_executions': event.active_executions,
'job_id': event.job_id,
'job_type': event.job_type,
'user_id': event.user_id,
'event_data': event.event_data,
'error_message': event.error_message,
'created_at': event.created_at.isoformat() if event.created_at else None
}
formatted_events.append(event_data)
return {
'events': formatted_events,
'total_count': total_count,
'limit': limit,
'offset': offset,
'has_more': (offset + limit) < total_count
}
except Exception as e:
logger.error(f"Error getting scheduler event history: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get scheduler event history: {str(e)}")
@router.get("/recent-scheduler-logs")
async def get_recent_scheduler_logs(
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get recent scheduler logs (restoration, job scheduling, etc.) for display in Execution Logs.
These are informational logs that show scheduler activity when actual execution logs are not available.
Returns only the latest 5 logs (rolling window, not accumulating).
Returns:
- List of latest 5 scheduler events (job_scheduled, job_completed, job_failed)
- Formatted as execution log-like entries for display
"""
try:
# Get only the latest 5 scheduler events - simple rolling window
# Focus on job-related events that indicate scheduler activity
query = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type.in_(['job_scheduled', 'job_completed', 'job_failed'])
).order_by(desc(SchedulerEventLog.event_date)).limit(5)
events = query.all()
# Log for debugging - show more details
logger.warning(
f"[Dashboard] Recent scheduler logs query: found {len(events)} events"
)
if events:
for e in events:
logger.warning(
f"[Dashboard] - Event: {e.event_type} | "
f"Job ID: {e.job_id} | User: {e.user_id} | "
f"Date: {e.event_date} | Error: {bool(e.error_message)}"
)
else:
# Check if there are ANY events of these types
total_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type.in_(['job_scheduled', 'job_completed', 'job_failed'])
).scalar() or 0
logger.warning(
f"[Dashboard] No recent scheduler logs found (query returned 0). "
f"Total events of these types in DB: {total_count}"
)
# Format as execution log-like entries
formatted_logs = []
for event in events:
event_data = event.event_data or {}
# Determine status based on event type
status = 'running'
if event.event_type == 'job_completed':
status = 'success'
elif event.event_type == 'job_failed':
status = 'failed'
# Extract job function name
job_function = event_data.get('job_function') or event_data.get('function_name') or 'unknown'
# Extract execution time if available
execution_time_ms = None
if event_data.get('execution_time_seconds'):
execution_time_ms = int(event_data.get('execution_time_seconds', 0) * 1000)
log_entry = {
'id': f"scheduler_event_{event.id}",
'task_id': None,
'user_id': event.user_id,
'execution_date': event.event_date.isoformat() if event.event_date else None,
'status': status,
'error_message': event.error_message,
'execution_time_ms': execution_time_ms,
'result_data': None,
'created_at': event.created_at.isoformat() if event.created_at else None,
'task': {
'id': None,
'task_title': f"{event.event_type.replace('_', ' ').title()}: {event.job_id or 'N/A'}",
'component_name': 'Scheduler',
'metric': job_function,
'frequency': 'one-time'
},
'is_scheduler_log': True, # Flag to indicate this is a scheduler log, not execution log
'event_type': event.event_type,
'job_id': event.job_id
}
formatted_logs.append(log_entry)
# Log the formatted response for debugging
logger.warning(
f"[Dashboard] Formatted {len(formatted_logs)} scheduler logs for response. "
f"Sample log entry keys: {list(formatted_logs[0].keys()) if formatted_logs else 'none'}"
)
return {
'logs': formatted_logs,
'total_count': len(formatted_logs),
'limit': 5,
'offset': 0,
'has_more': False,
'is_scheduler_logs': True # Indicate these are scheduler logs, not execution logs
}
except Exception as e:
logger.error(f"Error getting recent scheduler logs: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get recent scheduler logs: {str(e)}")