Scheduled research persona generation
This commit is contained in:
310
backend/api/oauth_token_monitoring_routes.py
Normal file
310
backend/api/oauth_token_monitoring_routes.py
Normal file
@@ -0,0 +1,310 @@
|
||||
"""
|
||||
OAuth Token Monitoring API Routes
|
||||
Provides endpoints for managing OAuth token monitoring tasks and manual triggers.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask, OAuthTokenExecutionLog
|
||||
from services.scheduler import get_scheduler
|
||||
from services.oauth_token_monitoring_service import create_oauth_monitoring_tasks, get_connected_platforms
|
||||
|
||||
router = APIRouter(prefix="/api/oauth-tokens", tags=["oauth-tokens"])
|
||||
|
||||
|
||||
@router.get("/status/{user_id}")
|
||||
async def get_oauth_token_status(
|
||||
user_id: str,
|
||||
db: Session = Depends(get_db_session),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user)
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get OAuth token monitoring status for all platforms for a user.
|
||||
|
||||
Returns:
|
||||
- List of monitoring tasks with status
|
||||
- Connection status for each platform
|
||||
- Last check time, last success, last failure
|
||||
"""
|
||||
try:
|
||||
# Verify user can only access their own data
|
||||
if str(current_user.get('id')) != user_id:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
# Get all monitoring tasks for user
|
||||
tasks = db.query(OAuthTokenMonitoringTask).filter(
|
||||
OAuthTokenMonitoringTask.user_id == user_id
|
||||
).all()
|
||||
|
||||
# Get connected platforms
|
||||
logger.info(f"[OAuth Status API] Getting token status for user: {user_id}")
|
||||
connected_platforms = get_connected_platforms(user_id)
|
||||
logger.info(f"[OAuth Status API] Found {len(connected_platforms)} connected platforms: {connected_platforms}")
|
||||
|
||||
# Build status response
|
||||
platform_status = {}
|
||||
for platform in ['gsc', 'bing', 'wordpress', 'wix']:
|
||||
task = next((t for t in tasks if t.platform == platform), None)
|
||||
is_connected = platform in connected_platforms
|
||||
|
||||
platform_status[platform] = {
|
||||
'connected': is_connected,
|
||||
'monitoring_task': {
|
||||
'id': task.id if task else None,
|
||||
'status': task.status if task else 'not_created',
|
||||
'last_check': task.last_check.isoformat() if task and task.last_check else None,
|
||||
'last_success': task.last_success.isoformat() if task and task.last_success else None,
|
||||
'last_failure': task.last_failure.isoformat() if task and task.last_failure else None,
|
||||
'failure_reason': task.failure_reason if task else None,
|
||||
'next_check': task.next_check.isoformat() if task and task.next_check else None,
|
||||
} if task else None
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"[OAuth Status API] Platform {platform}: "
|
||||
f"connected={is_connected}, "
|
||||
f"task_exists={task is not None}, "
|
||||
f"task_status={task.status if task else 'N/A'}"
|
||||
)
|
||||
|
||||
response_data = {
|
||||
"success": True,
|
||||
"data": {
|
||||
"user_id": user_id,
|
||||
"platform_status": platform_status,
|
||||
"connected_platforms": connected_platforms
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(f"[OAuth Status API] Returning status for user {user_id}: {len(connected_platforms)} platforms connected")
|
||||
return response_data
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting OAuth token status for user {user_id}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get token status: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/refresh/{user_id}/{platform}")
|
||||
async def manual_refresh_token(
|
||||
user_id: str,
|
||||
platform: str,
|
||||
db: Session = Depends(get_db_session),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user)
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Manually trigger token refresh for a specific platform.
|
||||
|
||||
This will:
|
||||
1. Find or create the monitoring task
|
||||
2. Execute the token check/refresh immediately
|
||||
3. Update the task status and next_check time
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
platform: Platform identifier ('gsc', 'bing', 'wordpress', 'wix')
|
||||
"""
|
||||
try:
|
||||
# Verify user can only access their own data
|
||||
if str(current_user.get('id')) != user_id:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
# Validate platform
|
||||
valid_platforms = ['gsc', 'bing', 'wordpress', 'wix']
|
||||
if platform not in valid_platforms:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid platform. Must be one of: {', '.join(valid_platforms)}"
|
||||
)
|
||||
|
||||
# Get or create monitoring task
|
||||
task = db.query(OAuthTokenMonitoringTask).filter(
|
||||
OAuthTokenMonitoringTask.user_id == user_id,
|
||||
OAuthTokenMonitoringTask.platform == platform
|
||||
).first()
|
||||
|
||||
if not task:
|
||||
# Create task if it doesn't exist
|
||||
task = OAuthTokenMonitoringTask(
|
||||
user_id=user_id,
|
||||
platform=platform,
|
||||
status='active',
|
||||
next_check=datetime.utcnow(), # Set to now to trigger immediately
|
||||
created_at=datetime.utcnow(),
|
||||
updated_at=datetime.utcnow()
|
||||
)
|
||||
db.add(task)
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
logger.info(f"Created monitoring task for manual refresh: user={user_id}, platform={platform}")
|
||||
|
||||
# Get scheduler and executor
|
||||
scheduler = get_scheduler()
|
||||
try:
|
||||
executor = scheduler.registry.get_executor('oauth_token_monitoring')
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=500, detail="OAuth token monitoring executor not available")
|
||||
|
||||
# Execute task immediately
|
||||
logger.info(f"Manually triggering token refresh: user={user_id}, platform={platform}")
|
||||
result = await executor.execute_task(task, db)
|
||||
|
||||
# Get updated task
|
||||
db.refresh(task)
|
||||
|
||||
return {
|
||||
"success": result.success,
|
||||
"message": "Token refresh completed" if result.success else "Token refresh failed",
|
||||
"data": {
|
||||
"platform": platform,
|
||||
"status": task.status,
|
||||
"last_check": task.last_check.isoformat() if task.last_check else None,
|
||||
"last_success": task.last_success.isoformat() if task.last_success else None,
|
||||
"last_failure": task.last_failure.isoformat() if task.last_failure else None,
|
||||
"failure_reason": task.failure_reason,
|
||||
"next_check": task.next_check.isoformat() if task.next_check else None,
|
||||
"execution_result": {
|
||||
"success": result.success,
|
||||
"error_message": result.error_message,
|
||||
"execution_time_ms": result.execution_time_ms,
|
||||
"result_data": result.result_data
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error manually refreshing token for user {user_id}, platform {platform}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to refresh token: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/execution-logs/{user_id}")
|
||||
async def get_execution_logs(
|
||||
user_id: str,
|
||||
platform: Optional[str] = Query(None, description="Filter by platform"),
|
||||
limit: int = Query(50, ge=1, le=100, description="Maximum number of logs"),
|
||||
offset: int = Query(0, ge=0, description="Offset for pagination"),
|
||||
db: Session = Depends(get_db_session),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user)
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get execution logs for OAuth token monitoring tasks.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
platform: Optional platform filter
|
||||
limit: Maximum number of logs to return
|
||||
offset: Pagination offset
|
||||
"""
|
||||
try:
|
||||
# Verify user can only access their own data
|
||||
if str(current_user.get('id')) != user_id:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
# Build query
|
||||
query = db.query(OAuthTokenExecutionLog).join(
|
||||
OAuthTokenMonitoringTask,
|
||||
OAuthTokenExecutionLog.task_id == OAuthTokenMonitoringTask.id
|
||||
).filter(
|
||||
OAuthTokenMonitoringTask.user_id == user_id
|
||||
)
|
||||
|
||||
# Apply platform filter if provided
|
||||
if platform:
|
||||
query = query.filter(OAuthTokenMonitoringTask.platform == platform)
|
||||
|
||||
# Get total count
|
||||
total_count = query.count()
|
||||
|
||||
# Get paginated logs
|
||||
logs = query.order_by(
|
||||
OAuthTokenExecutionLog.execution_date.desc()
|
||||
).offset(offset).limit(limit).all()
|
||||
|
||||
# Format logs
|
||||
logs_data = []
|
||||
for log in logs:
|
||||
logs_data.append({
|
||||
"id": log.id,
|
||||
"task_id": log.task_id,
|
||||
"platform": log.task.platform, # Get platform from relationship
|
||||
"execution_date": log.execution_date.isoformat(),
|
||||
"status": log.status,
|
||||
"result_data": log.result_data,
|
||||
"error_message": log.error_message,
|
||||
"execution_time_ms": log.execution_time_ms,
|
||||
"created_at": log.created_at.isoformat()
|
||||
})
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"data": {
|
||||
"logs": logs_data,
|
||||
"total_count": total_count,
|
||||
"limit": limit,
|
||||
"offset": offset
|
||||
}
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting execution logs for user {user_id}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get execution logs: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/create-tasks/{user_id}")
|
||||
async def create_monitoring_tasks(
|
||||
user_id: str,
|
||||
platforms: Optional[List[str]] = None,
|
||||
db: Session = Depends(get_db_session),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user)
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Manually create OAuth token monitoring tasks for a user.
|
||||
|
||||
If platforms are not provided, automatically detects connected platforms.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
platforms: Optional list of platforms to create tasks for
|
||||
"""
|
||||
try:
|
||||
# Verify user can only access their own data
|
||||
if str(current_user.get('id')) != user_id:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
# Create tasks
|
||||
tasks = create_oauth_monitoring_tasks(user_id, db, platforms)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"Created {len(tasks)} monitoring task(s)",
|
||||
"data": {
|
||||
"tasks_created": len(tasks),
|
||||
"tasks": [
|
||||
{
|
||||
"id": task.id,
|
||||
"platform": task.platform,
|
||||
"status": task.status,
|
||||
"next_check": task.next_check.isoformat() if task.next_check else None
|
||||
}
|
||||
for task in tasks
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating monitoring tasks for user {user_id}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to create monitoring tasks: {str(e)}")
|
||||
|
||||
@@ -12,6 +12,9 @@ from services.onboarding.progress_service import get_onboarding_progress_service
|
||||
from services.onboarding.database_service import OnboardingDatabaseService
|
||||
from services.database import get_db
|
||||
from services.persona_analysis_service import PersonaAnalysisService
|
||||
from services.research.research_persona_scheduler import schedule_research_persona_generation
|
||||
from services.persona.facebook.facebook_persona_scheduler import schedule_facebook_persona_generation
|
||||
from services.oauth_token_monitoring_service import create_oauth_monitoring_tasks
|
||||
|
||||
class OnboardingCompletionService:
|
||||
"""Service for handling onboarding completion logic."""
|
||||
@@ -46,6 +49,38 @@ class OnboardingCompletionService:
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to mark onboarding as complete")
|
||||
|
||||
# Schedule research persona generation 20 minutes after onboarding completion
|
||||
try:
|
||||
schedule_research_persona_generation(user_id, delay_minutes=20)
|
||||
logger.info(f"Scheduled research persona generation for user {user_id} (20 minutes after onboarding)")
|
||||
except Exception as e:
|
||||
# Non-critical: log but don't fail onboarding completion
|
||||
logger.warning(f"Failed to schedule research persona generation for user {user_id}: {e}")
|
||||
|
||||
# Schedule Facebook persona generation 20 minutes after onboarding completion
|
||||
try:
|
||||
schedule_facebook_persona_generation(user_id, delay_minutes=20)
|
||||
logger.info(f"Scheduled Facebook persona generation for user {user_id} (20 minutes after onboarding)")
|
||||
except Exception as e:
|
||||
# Non-critical: log but don't fail onboarding completion
|
||||
logger.warning(f"Failed to schedule Facebook persona generation for user {user_id}: {e}")
|
||||
|
||||
# Create OAuth token monitoring tasks for connected platforms
|
||||
try:
|
||||
from services.database import SessionLocal
|
||||
db = SessionLocal()
|
||||
try:
|
||||
monitoring_tasks = create_oauth_monitoring_tasks(user_id, db)
|
||||
logger.info(
|
||||
f"Created {len(monitoring_tasks)} OAuth token monitoring tasks for user {user_id} "
|
||||
f"on onboarding completion"
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
# Non-critical: log but don't fail onboarding completion
|
||||
logger.warning(f"Failed to create OAuth token monitoring tasks for user {user_id}: {e}")
|
||||
|
||||
return {
|
||||
"message": "Onboarding completed successfully",
|
||||
"completed_at": datetime.now().isoformat(),
|
||||
|
||||
@@ -380,6 +380,41 @@ async def generate_platform_persona(user_id: str, platform: str, db_session):
|
||||
logger.error(f"Error generating {platform} persona: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to generate {platform} persona: {str(e)}")
|
||||
|
||||
async def check_facebook_persona(user_id: str, db: Session):
|
||||
"""Check if Facebook persona exists for user."""
|
||||
try:
|
||||
from services.persona_data_service import PersonaDataService
|
||||
|
||||
persona_data_service = PersonaDataService(db_session=db)
|
||||
persona_data = persona_data_service.get_user_persona_data(user_id)
|
||||
|
||||
if not persona_data:
|
||||
return {
|
||||
"has_persona": False,
|
||||
"has_core_persona": False,
|
||||
"message": "No persona data found",
|
||||
"onboarding_completed": False
|
||||
}
|
||||
|
||||
platform_personas = persona_data.get('platform_personas', {})
|
||||
facebook_persona = platform_personas.get('facebook') if platform_personas else None
|
||||
|
||||
# Check if core persona exists
|
||||
has_core_persona = bool(persona_data.get('core_persona'))
|
||||
|
||||
# Assume onboarding is completed if persona data exists
|
||||
onboarding_completed = True
|
||||
|
||||
return {
|
||||
"has_persona": bool(facebook_persona),
|
||||
"has_core_persona": has_core_persona,
|
||||
"persona": facebook_persona,
|
||||
"onboarding_completed": onboarding_completed
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking Facebook persona for user {user_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
async def validate_persona_generation_readiness(user_id: int):
|
||||
"""Check if user has sufficient onboarding data for persona generation."""
|
||||
try:
|
||||
|
||||
@@ -36,7 +36,7 @@ from api.persona import (
|
||||
)
|
||||
|
||||
from services.persona_replication_engine import PersonaReplicationEngine
|
||||
from api.persona import update_platform_persona, generate_platform_persona
|
||||
from api.persona import update_platform_persona, generate_platform_persona, check_facebook_persona
|
||||
|
||||
# Create router
|
||||
router = APIRouter(prefix="/api/personas", tags=["personas"])
|
||||
@@ -248,4 +248,12 @@ async def update_platform_persona_endpoint(
|
||||
Allows editing persona fields in the UI and saving them to the database.
|
||||
"""
|
||||
# Beta testing: Force user_id=1 for all requests
|
||||
return await update_platform_persona(1, platform, update_data)
|
||||
return await update_platform_persona(1, platform, update_data)
|
||||
|
||||
@router.get("/facebook-persona/check/{user_id}")
|
||||
async def check_facebook_persona_endpoint(
|
||||
user_id: str,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""Check if Facebook persona exists for user."""
|
||||
return await check_facebook_persona(user_id, db)
|
||||
398
backend/api/research_config.py
Normal file
398
backend/api/research_config.py
Normal file
@@ -0,0 +1,398 @@
|
||||
"""
|
||||
Research Configuration API
|
||||
Provides provider availability and persona-aware defaults for research.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from typing import Dict, Any, Optional
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from services.user_api_key_context import get_exa_key, get_gemini_key
|
||||
from services.onboarding.database_service import OnboardingDatabaseService
|
||||
from services.onboarding.progress_service import get_onboarding_progress_service
|
||||
from services.database import get_db
|
||||
from sqlalchemy.orm import Session
|
||||
from services.research.research_persona_service import ResearchPersonaService
|
||||
from services.research.research_persona_scheduler import schedule_research_persona_generation
|
||||
from models.research_persona_models import ResearchPersona
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class ProviderAvailability(BaseModel):
|
||||
"""Provider availability status."""
|
||||
google_available: bool
|
||||
exa_available: bool
|
||||
gemini_key_status: str # 'configured' | 'missing'
|
||||
exa_key_status: str # 'configured' | 'missing'
|
||||
|
||||
|
||||
class PersonaDefaults(BaseModel):
|
||||
"""Persona-aware research defaults."""
|
||||
industry: Optional[str] = None
|
||||
target_audience: Optional[str] = None
|
||||
suggested_domains: list[str] = []
|
||||
suggested_exa_category: Optional[str] = None
|
||||
|
||||
|
||||
class ResearchConfigResponse(BaseModel):
|
||||
"""Combined research configuration response."""
|
||||
provider_availability: ProviderAvailability
|
||||
persona_defaults: PersonaDefaults
|
||||
research_persona: Optional[ResearchPersona] = None
|
||||
onboarding_completed: bool = False
|
||||
persona_scheduled: bool = False
|
||||
|
||||
|
||||
@router.get("/provider-availability", response_model=ProviderAvailability)
|
||||
async def get_provider_availability(
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Check which research providers are available for the current user.
|
||||
|
||||
Returns:
|
||||
- google_available: True if Gemini key is configured
|
||||
- exa_available: True if Exa key is configured
|
||||
- Key status for each provider
|
||||
"""
|
||||
try:
|
||||
user_id = str(current_user.get('id'))
|
||||
|
||||
# Check API key availability
|
||||
gemini_key = get_gemini_key(user_id)
|
||||
exa_key = get_exa_key(user_id)
|
||||
|
||||
google_available = bool(gemini_key and gemini_key.strip())
|
||||
exa_available = bool(exa_key and exa_key.strip())
|
||||
|
||||
return ProviderAvailability(
|
||||
google_available=google_available,
|
||||
exa_available=exa_available,
|
||||
gemini_key_status='configured' if google_available else 'missing',
|
||||
exa_key_status='configured' if exa_available else 'missing'
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[ResearchConfig] Error checking provider availability for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to check provider availability: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/persona-defaults", response_model=PersonaDefaults)
|
||||
async def get_persona_defaults(
|
||||
current_user: Dict = Depends(get_current_user),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get persona-aware research defaults for the current user.
|
||||
|
||||
Returns industry, target audience, and smart suggestions based on onboarding data.
|
||||
"""
|
||||
try:
|
||||
user_id = str(current_user.get('id'))
|
||||
|
||||
# Add explicit null check for database session
|
||||
if not db:
|
||||
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_persona_defaults")
|
||||
# Return defaults rather than error
|
||||
return PersonaDefaults()
|
||||
|
||||
db_service = OnboardingDatabaseService(db=db)
|
||||
|
||||
# Try to get persona data first (most reliable source for industry/target_audience)
|
||||
persona_data = db_service.get_persona_data(user_id, db)
|
||||
industry = 'General'
|
||||
target_audience = 'General'
|
||||
|
||||
if persona_data:
|
||||
core_persona = persona_data.get('corePersona') or persona_data.get('core_persona')
|
||||
if core_persona:
|
||||
if core_persona.get('industry'):
|
||||
industry = core_persona['industry']
|
||||
if core_persona.get('target_audience'):
|
||||
target_audience = core_persona['target_audience']
|
||||
|
||||
# Fallback to website analysis if persona data doesn't have industry info
|
||||
if industry == 'General':
|
||||
website_analysis = db_service.get_website_analysis(user_id, db)
|
||||
if website_analysis:
|
||||
target_audience_data = website_analysis.get('target_audience', {})
|
||||
if isinstance(target_audience_data, dict):
|
||||
# Extract from target_audience JSON field
|
||||
industry_focus = target_audience_data.get('industry_focus')
|
||||
if industry_focus:
|
||||
industry = industry_focus
|
||||
demographics = target_audience_data.get('demographics')
|
||||
if demographics:
|
||||
target_audience = demographics if isinstance(demographics, str) else str(demographics)
|
||||
|
||||
# Suggest domains based on industry
|
||||
suggested_domains = _get_domain_suggestions(industry)
|
||||
|
||||
# Suggest Exa category based on industry
|
||||
suggested_exa_category = _get_exa_category_suggestion(industry)
|
||||
|
||||
return PersonaDefaults(
|
||||
industry=industry,
|
||||
target_audience=target_audience,
|
||||
suggested_domains=suggested_domains,
|
||||
suggested_exa_category=suggested_exa_category
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[ResearchConfig] Error getting persona defaults for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
|
||||
# Return defaults rather than error
|
||||
return PersonaDefaults()
|
||||
|
||||
|
||||
@router.get("/research-persona")
|
||||
async def get_research_persona(
|
||||
current_user: Dict = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
force_refresh: bool = Query(False, description="Force regenerate persona even if cache is valid")
|
||||
):
|
||||
"""
|
||||
Get or generate research persona for the current user.
|
||||
|
||||
Query params:
|
||||
- force_refresh: If true, regenerate persona even if cache is valid (default: false)
|
||||
|
||||
Returns research persona with personalized defaults, suggestions, and configurations.
|
||||
"""
|
||||
try:
|
||||
user_id = str(current_user.get('id'))
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User not authenticated")
|
||||
|
||||
# Add explicit null check for database session
|
||||
if not db:
|
||||
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_research_persona")
|
||||
raise HTTPException(status_code=500, detail="Database not available")
|
||||
|
||||
persona_service = ResearchPersonaService(db_session=db)
|
||||
research_persona = persona_service.get_or_generate(user_id, force_refresh=force_refresh)
|
||||
|
||||
if not research_persona:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Research persona not available. Complete onboarding to generate one."
|
||||
)
|
||||
|
||||
return research_persona.dict()
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit) to preserve status code and details
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[ResearchConfig] Error getting research persona for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get research persona: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/config", response_model=ResearchConfigResponse)
|
||||
async def get_research_config(
|
||||
current_user: Dict = Depends(get_current_user),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get complete research configuration including provider availability and persona defaults.
|
||||
"""
|
||||
user_id = None
|
||||
try:
|
||||
user_id = str(current_user.get('id'))
|
||||
logger.info(f"[ResearchConfig] Starting get_research_config for user {user_id}")
|
||||
|
||||
# Add explicit null check for database session
|
||||
if not db:
|
||||
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_research_config")
|
||||
raise HTTPException(status_code=500, detail="Database session not available")
|
||||
|
||||
# Get provider availability
|
||||
logger.debug(f"[ResearchConfig] Getting provider availability for user {user_id}")
|
||||
gemini_key = get_gemini_key(user_id)
|
||||
exa_key = get_exa_key(user_id)
|
||||
|
||||
google_available = bool(gemini_key and gemini_key.strip())
|
||||
exa_available = bool(exa_key and exa_key.strip())
|
||||
|
||||
provider_availability = ProviderAvailability(
|
||||
google_available=google_available,
|
||||
exa_available=exa_available,
|
||||
gemini_key_status='configured' if google_available else 'missing',
|
||||
exa_key_status='configured' if exa_available else 'missing'
|
||||
)
|
||||
|
||||
# Get persona defaults
|
||||
logger.debug(f"[ResearchConfig] Getting persona defaults for user {user_id}")
|
||||
db_service = OnboardingDatabaseService(db=db)
|
||||
|
||||
# Try to get persona data first (most reliable source for industry/target_audience)
|
||||
try:
|
||||
persona_data = db_service.get_persona_data(user_id, db)
|
||||
except Exception as e:
|
||||
logger.error(f"[ResearchConfig] Error getting persona data for user {user_id}: {e}", exc_info=True)
|
||||
persona_data = None
|
||||
|
||||
industry = 'General'
|
||||
target_audience = 'General'
|
||||
|
||||
if persona_data:
|
||||
core_persona = persona_data.get('corePersona') or persona_data.get('core_persona')
|
||||
if core_persona:
|
||||
if core_persona.get('industry'):
|
||||
industry = core_persona['industry']
|
||||
if core_persona.get('target_audience'):
|
||||
target_audience = core_persona['target_audience']
|
||||
|
||||
# Fallback to website analysis if persona data doesn't have industry info
|
||||
if industry == 'General':
|
||||
website_analysis = db_service.get_website_analysis(user_id, db)
|
||||
if website_analysis:
|
||||
target_audience_data = website_analysis.get('target_audience', {})
|
||||
if isinstance(target_audience_data, dict):
|
||||
# Extract from target_audience JSON field
|
||||
industry_focus = target_audience_data.get('industry_focus')
|
||||
if industry_focus:
|
||||
industry = industry_focus
|
||||
demographics = target_audience_data.get('demographics')
|
||||
if demographics:
|
||||
target_audience = demographics if isinstance(demographics, str) else str(demographics)
|
||||
|
||||
persona_defaults = PersonaDefaults(
|
||||
industry=industry,
|
||||
target_audience=target_audience,
|
||||
suggested_domains=_get_domain_suggestions(industry),
|
||||
suggested_exa_category=_get_exa_category_suggestion(industry)
|
||||
)
|
||||
|
||||
# Check onboarding completion status
|
||||
onboarding_completed = False
|
||||
try:
|
||||
logger.debug(f"[ResearchConfig] Checking onboarding status for user {user_id}")
|
||||
progress_service = get_onboarding_progress_service()
|
||||
onboarding_status = progress_service.get_onboarding_status(user_id)
|
||||
onboarding_completed = onboarding_status.get('is_completed', False)
|
||||
logger.info(
|
||||
f"[ResearchConfig] Onboarding status check for user {user_id}: "
|
||||
f"is_completed={onboarding_completed}, "
|
||||
f"current_step={onboarding_status.get('current_step')}, "
|
||||
f"progress={onboarding_status.get('completion_percentage')}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[ResearchConfig] Could not check onboarding status for user {user_id}: {e}", exc_info=True)
|
||||
# Continue with onboarding_completed=False
|
||||
|
||||
# Get research persona (optional, may not exist for all users)
|
||||
# CRITICAL: Use get_cached_only() to avoid triggering rate limit checks
|
||||
# Only return persona if it's already cached - don't generate on config load
|
||||
research_persona = None
|
||||
persona_scheduled = False
|
||||
try:
|
||||
logger.debug(f"[ResearchConfig] Getting cached research persona for user {user_id}")
|
||||
persona_service = ResearchPersonaService(db_session=db)
|
||||
research_persona = persona_service.get_cached_only(user_id)
|
||||
|
||||
logger.info(
|
||||
f"[ResearchConfig] Research persona check for user {user_id}: "
|
||||
f"persona_exists={research_persona is not None}, "
|
||||
f"onboarding_completed={onboarding_completed}"
|
||||
)
|
||||
|
||||
# If onboarding is completed but persona doesn't exist, schedule generation
|
||||
if onboarding_completed and not research_persona:
|
||||
try:
|
||||
# Check if persona data exists (to ensure we have data to generate from)
|
||||
db_service = OnboardingDatabaseService(db=db)
|
||||
persona_data = db_service.get_persona_data(user_id, db)
|
||||
if persona_data and (persona_data.get('corePersona') or persona_data.get('platformPersonas') or
|
||||
persona_data.get('core_persona') or persona_data.get('platform_personas')):
|
||||
# Schedule persona generation (20 minutes from now)
|
||||
schedule_research_persona_generation(user_id, delay_minutes=20)
|
||||
logger.info(f"Scheduled research persona generation for user {user_id} (onboarding already completed)")
|
||||
persona_scheduled = True
|
||||
else:
|
||||
logger.info(f"Onboarding completed but no persona data found for user {user_id} - cannot schedule persona generation")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to schedule research persona generation: {e}", exc_info=True)
|
||||
except Exception as e:
|
||||
# get_cached_only() never raises HTTPException, but catch any unexpected errors
|
||||
logger.warning(f"[ResearchConfig] Could not load cached research persona for user {user_id}: {e}", exc_info=True)
|
||||
|
||||
# FastAPI will automatically serialize the ResearchPersona Pydantic model
|
||||
# If there's a serialization issue, we catch it and log it
|
||||
try:
|
||||
response = ResearchConfigResponse(
|
||||
provider_availability=provider_availability,
|
||||
persona_defaults=persona_defaults,
|
||||
research_persona=research_persona,
|
||||
onboarding_completed=onboarding_completed,
|
||||
persona_scheduled=persona_scheduled
|
||||
)
|
||||
except Exception as serialization_error:
|
||||
logger.error(f"[ResearchConfig] Failed to create ResearchConfigResponse for user {user_id}: {serialization_error}", exc_info=True)
|
||||
# Try without research_persona as fallback
|
||||
response = ResearchConfigResponse(
|
||||
provider_availability=provider_availability,
|
||||
persona_defaults=persona_defaults,
|
||||
research_persona=None,
|
||||
onboarding_completed=onboarding_completed,
|
||||
persona_scheduled=persona_scheduled
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"[ResearchConfig] Response for user {user_id}: "
|
||||
f"onboarding_completed={onboarding_completed}, "
|
||||
f"persona_exists={research_persona is not None}, "
|
||||
f"persona_scheduled={persona_scheduled}"
|
||||
)
|
||||
|
||||
return response
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429, 401, etc.) to preserve status codes
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[ResearchConfig] CRITICAL ERROR getting research config for user {user_id if user_id else 'unknown'}: {e}", exc_info=True)
|
||||
import traceback
|
||||
logger.error(f"[ResearchConfig] Full traceback:\n{traceback.format_exc()}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get research config: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
# Helper functions from RESEARCH_AI_HYPERPERSONALIZATION.md
|
||||
|
||||
def _get_domain_suggestions(industry: str) -> list[str]:
|
||||
"""Get domain suggestions based on industry."""
|
||||
domain_map = {
|
||||
'Healthcare': ['pubmed.gov', 'nejm.org', 'thelancet.com', 'nih.gov'],
|
||||
'Technology': ['techcrunch.com', 'wired.com', 'arstechnica.com', 'theverge.com'],
|
||||
'Finance': ['wsj.com', 'bloomberg.com', 'ft.com', 'reuters.com'],
|
||||
'Science': ['nature.com', 'sciencemag.org', 'cell.com', 'pnas.org'],
|
||||
'Business': ['hbr.org', 'forbes.com', 'businessinsider.com', 'mckinsey.com'],
|
||||
'Marketing': ['marketingland.com', 'adweek.com', 'hubspot.com', 'moz.com'],
|
||||
'Education': ['edutopia.org', 'chronicle.com', 'insidehighered.com'],
|
||||
'Real Estate': ['realtor.com', 'zillow.com', 'forbes.com'],
|
||||
'Entertainment': ['variety.com', 'hollywoodreporter.com', 'deadline.com'],
|
||||
'Travel': ['lonelyplanet.com', 'nationalgeographic.com', 'travelandleisure.com'],
|
||||
'Fashion': ['vogue.com', 'elle.com', 'wwd.com'],
|
||||
'Sports': ['espn.com', 'si.com', 'bleacherreport.com'],
|
||||
'Law': ['law.com', 'abajournal.com', 'scotusblog.com'],
|
||||
}
|
||||
return domain_map.get(industry, [])
|
||||
|
||||
|
||||
def _get_exa_category_suggestion(industry: str) -> Optional[str]:
|
||||
"""Get Exa category suggestion based on industry."""
|
||||
category_map = {
|
||||
'Healthcare': 'research paper',
|
||||
'Science': 'research paper',
|
||||
'Finance': 'financial report',
|
||||
'Technology': 'company',
|
||||
'Business': 'company',
|
||||
'Marketing': 'company',
|
||||
'Education': 'research paper',
|
||||
'Law': 'pdf',
|
||||
}
|
||||
return category_map.get(industry)
|
||||
|
||||
706
backend/api/scheduler_dashboard.py
Normal file
706
backend/api/scheduler_dashboard.py
Normal file
@@ -0,0 +1,706 @@
|
||||
"""
|
||||
Scheduler Dashboard API
|
||||
Provides endpoints for scheduler dashboard UI.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Depends, Query
|
||||
from typing import Dict, Any, Optional, List
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
from sqlalchemy import desc, func
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
|
||||
from services.scheduler import get_scheduler
|
||||
from services.scheduler.utils.user_job_store import get_user_job_store_name
|
||||
from services.monitoring_data_service import MonitoringDataService
|
||||
from services.database import get_db
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from models.monitoring_models import TaskExecutionLog, MonitoringTask
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
from sqlalchemy import func
|
||||
|
||||
router = APIRouter(prefix="/api/scheduler", tags=["scheduler-dashboard"])
|
||||
|
||||
|
||||
@router.get("/dashboard")
|
||||
async def get_scheduler_dashboard(
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get scheduler dashboard statistics and current state.
|
||||
|
||||
Returns:
|
||||
- Scheduler stats (total checks, tasks executed, failed, etc.)
|
||||
- Current scheduled jobs
|
||||
- Active strategies count
|
||||
- Check interval
|
||||
- User isolation status
|
||||
- Last check timestamp
|
||||
"""
|
||||
try:
|
||||
scheduler = get_scheduler()
|
||||
|
||||
# Get user_id from current_user (Clerk format)
|
||||
user_id_str = str(current_user.get('id', '')) if current_user else None
|
||||
|
||||
# Get scheduler stats
|
||||
stats = scheduler.get_stats(user_id=None) # Get all stats for dashboard
|
||||
|
||||
# Get all scheduled jobs
|
||||
all_jobs = scheduler.scheduler.get_jobs()
|
||||
|
||||
# Format jobs with user context
|
||||
formatted_jobs = []
|
||||
for job in all_jobs:
|
||||
job_info = {
|
||||
'id': job.id,
|
||||
'trigger_type': type(job.trigger).__name__,
|
||||
'next_run_time': job.next_run_time.isoformat() if job.next_run_time else None,
|
||||
'user_id': None,
|
||||
'job_store': 'default',
|
||||
'user_job_store': 'default'
|
||||
}
|
||||
|
||||
# Extract user_id from job
|
||||
user_id_from_job = None
|
||||
if hasattr(job, 'kwargs') and job.kwargs and job.kwargs.get('user_id'):
|
||||
user_id_from_job = job.kwargs.get('user_id')
|
||||
elif job.id and ('research_persona_' in job.id or 'facebook_persona_' in job.id):
|
||||
parts = job.id.split('_')
|
||||
if len(parts) >= 3:
|
||||
user_id_from_job = parts[2]
|
||||
|
||||
if user_id_from_job:
|
||||
job_info['user_id'] = user_id_from_job
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(user_id_from_job, db)
|
||||
job_info['user_job_store'] = user_job_store
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get job store for user {user_id_from_job}: {e}")
|
||||
|
||||
formatted_jobs.append(job_info)
|
||||
|
||||
# Add OAuth token monitoring tasks from database (these are recurring weekly tasks)
|
||||
try:
|
||||
oauth_tasks = db.query(OAuthTokenMonitoringTask).filter(
|
||||
OAuthTokenMonitoringTask.status == 'active'
|
||||
).all()
|
||||
|
||||
oauth_tasks_count = len(oauth_tasks)
|
||||
if oauth_tasks_count > 0:
|
||||
# Log platform breakdown for debugging
|
||||
platforms = {}
|
||||
for task in oauth_tasks:
|
||||
platforms[task.platform] = platforms.get(task.platform, 0) + 1
|
||||
|
||||
platform_summary = ", ".join([f"{platform}: {count}" for platform, count in platforms.items()])
|
||||
logger.warning(
|
||||
f"[Dashboard] OAuth Monitoring: Found {oauth_tasks_count} active OAuth token monitoring tasks "
|
||||
f"({platform_summary})"
|
||||
)
|
||||
else:
|
||||
# Check if there are any inactive tasks
|
||||
all_oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
if all_oauth_tasks:
|
||||
inactive_by_status = {}
|
||||
for task in all_oauth_tasks:
|
||||
status = task.status
|
||||
inactive_by_status[status] = inactive_by_status.get(status, 0) + 1
|
||||
logger.warning(
|
||||
f"[Dashboard] OAuth Monitoring: Found {len(all_oauth_tasks)} total OAuth tasks, "
|
||||
f"but {oauth_tasks_count} are active. Status breakdown: {inactive_by_status}"
|
||||
)
|
||||
|
||||
for task in oauth_tasks:
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
user_job_store = 'default'
|
||||
logger.debug(f"Could not get job store for user {task.user_id}: {e}")
|
||||
|
||||
# Format as recurring weekly job
|
||||
job_info = {
|
||||
'id': f"oauth_token_monitoring_{task.platform}_{task.user_id}",
|
||||
'trigger_type': 'CronTrigger', # Weekly recurring
|
||||
'next_run_time': task.next_check.isoformat() if task.next_check else None,
|
||||
'user_id': task.user_id,
|
||||
'job_store': 'default',
|
||||
'user_job_store': user_job_store,
|
||||
'function_name': 'oauth_token_monitoring_executor.execute_task',
|
||||
'platform': task.platform,
|
||||
'task_id': task.id,
|
||||
'is_database_task': True, # Flag to indicate this is a DB task, not APScheduler job
|
||||
'frequency': 'Weekly'
|
||||
}
|
||||
|
||||
formatted_jobs.append(job_info)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading OAuth token monitoring tasks: {e}", exc_info=True)
|
||||
|
||||
# Get active strategies count
|
||||
active_strategies = stats.get('active_strategies_count', 0)
|
||||
|
||||
# Get last_update from stats (added by scheduler for frontend polling)
|
||||
last_update = stats.get('last_update')
|
||||
|
||||
# Calculate cumulative/historical values from scheduler_event_logs
|
||||
cumulative_stats = {}
|
||||
try:
|
||||
# First, check total events in database for debugging
|
||||
total_events = db.query(func.count(SchedulerEventLog.id)).scalar() or 0
|
||||
|
||||
# Check for check_cycle events specifically
|
||||
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).scalar() or 0
|
||||
|
||||
# Also check for other event types that might have task counts
|
||||
job_failed_count = db.query(func.count(SchedulerEventLog.id)).filter(
|
||||
SchedulerEventLog.event_type == 'job_failed'
|
||||
).scalar() or 0
|
||||
job_completed_count = db.query(func.count(SchedulerEventLog.id)).filter(
|
||||
SchedulerEventLog.event_type == 'job_completed'
|
||||
).scalar() or 0
|
||||
|
||||
logger.warning(
|
||||
f"[Dashboard] Database stats: {total_events} total events, "
|
||||
f"{check_cycle_count} check_cycles, {job_failed_count} job_failed, "
|
||||
f"{job_completed_count} job_completed"
|
||||
)
|
||||
|
||||
if check_cycle_count > 0:
|
||||
logger.warning(f"[Dashboard] Found {check_cycle_count} check cycle events in database")
|
||||
# Aggregate check cycle events for cumulative totals
|
||||
result = db.query(
|
||||
func.count(SchedulerEventLog.id),
|
||||
func.sum(SchedulerEventLog.tasks_found),
|
||||
func.sum(SchedulerEventLog.tasks_executed),
|
||||
func.sum(SchedulerEventLog.tasks_failed)
|
||||
).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).first()
|
||||
|
||||
if result:
|
||||
# SQLAlchemy returns tuple for multi-column queries
|
||||
# SUM returns NULL when no rows, handle that
|
||||
total_cycles = result[0] if result[0] is not None else 0
|
||||
total_found = result[1] if result[1] is not None else 0
|
||||
total_executed = result[2] if result[2] is not None else 0
|
||||
total_failed = result[3] if result[3] is not None else 0
|
||||
|
||||
cumulative_stats = {
|
||||
'total_check_cycles': int(total_cycles),
|
||||
'cumulative_tasks_found': int(total_found),
|
||||
'cumulative_tasks_executed': int(total_executed),
|
||||
'cumulative_tasks_failed': int(total_failed)
|
||||
}
|
||||
|
||||
logger.warning(f"[Dashboard] Cumulative stats from check_cycles: {cumulative_stats}")
|
||||
else:
|
||||
# No results (shouldn't happen with COUNT, but handle it)
|
||||
cumulative_stats = {
|
||||
'total_check_cycles': 0,
|
||||
'cumulative_tasks_found': 0,
|
||||
'cumulative_tasks_executed': 0,
|
||||
'cumulative_tasks_failed': 0
|
||||
}
|
||||
logger.warning("[Dashboard] Query returned None (no check cycle events)")
|
||||
else:
|
||||
# No check cycles yet, but we can still show job counts
|
||||
# Log detailed info about why cumulative stats are 0
|
||||
if stats.get('total_checks', 0) > 0:
|
||||
logger.warning(
|
||||
f"[Dashboard] ⚠️ Scheduler shows {stats.get('total_checks', 0)} checks in memory, "
|
||||
f"but NO check_cycle events found in database. "
|
||||
f"This suggests check_cycle events are not being saved properly."
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Dashboard] No check_cycle events yet. "
|
||||
f"Scheduler interval: {stats.get('check_interval_minutes', 60)}min. "
|
||||
f"First check cycle will run after interval expires. "
|
||||
f"One-time jobs: {job_completed_count} completed, {job_failed_count} failed"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating cumulative stats: {e}", exc_info=True)
|
||||
cumulative_stats = {
|
||||
'total_check_cycles': 0,
|
||||
'cumulative_tasks_found': 0,
|
||||
'cumulative_tasks_executed': 0,
|
||||
'cumulative_tasks_failed': 0
|
||||
}
|
||||
|
||||
return {
|
||||
'stats': {
|
||||
# Current session stats (from scheduler memory)
|
||||
'total_checks': stats.get('total_checks', 0),
|
||||
'tasks_found': stats.get('tasks_found', 0),
|
||||
'tasks_executed': stats.get('tasks_executed', 0),
|
||||
'tasks_failed': stats.get('tasks_failed', 0),
|
||||
'tasks_skipped': stats.get('tasks_skipped', 0),
|
||||
'last_check': stats.get('last_check'),
|
||||
'last_update': last_update, # Include for frontend polling
|
||||
'active_executions': stats.get('active_executions', 0),
|
||||
'running': stats.get('running', False),
|
||||
'check_interval_minutes': stats.get('check_interval_minutes', 60),
|
||||
'min_check_interval_minutes': stats.get('min_check_interval_minutes', 15),
|
||||
'max_check_interval_minutes': stats.get('max_check_interval_minutes', 60),
|
||||
'intelligent_scheduling': stats.get('intelligent_scheduling', True),
|
||||
'active_strategies_count': active_strategies,
|
||||
'last_interval_adjustment': stats.get('last_interval_adjustment'),
|
||||
'registered_types': stats.get('registered_types', []),
|
||||
# Cumulative/historical stats (from database)
|
||||
'cumulative_total_check_cycles': cumulative_stats.get('total_check_cycles', 0),
|
||||
'cumulative_tasks_found': cumulative_stats.get('cumulative_tasks_found', 0),
|
||||
'cumulative_tasks_executed': cumulative_stats.get('cumulative_tasks_executed', 0),
|
||||
'cumulative_tasks_failed': cumulative_stats.get('cumulative_tasks_failed', 0)
|
||||
},
|
||||
'jobs': formatted_jobs,
|
||||
'job_count': len(formatted_jobs),
|
||||
'recurring_jobs': 1 + len([j for j in formatted_jobs if j.get('is_database_task')]), # check_due_tasks + OAuth tasks
|
||||
'one_time_jobs': len([j for j in formatted_jobs if not j.get('is_database_task') and j.get('trigger_type') == 'DateTrigger']),
|
||||
'user_isolation': {
|
||||
'enabled': True,
|
||||
'current_user_id': user_id_str
|
||||
},
|
||||
'last_updated': datetime.utcnow().isoformat() # Keep for backward compatibility
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting scheduler dashboard: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get scheduler dashboard: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/execution-logs")
|
||||
async def get_execution_logs(
|
||||
limit: int = Query(50, ge=1, le=500),
|
||||
offset: int = Query(0, ge=0),
|
||||
status: Optional[str] = Query(None, regex="^(success|failed|running|skipped)$"),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get task execution logs from database.
|
||||
|
||||
Query Params:
|
||||
- limit: Number of logs to return (1-500, default: 50)
|
||||
- offset: Pagination offset (default: 0)
|
||||
- status: Filter by status (success, failed, running, skipped)
|
||||
|
||||
Returns:
|
||||
- List of execution logs with task details
|
||||
- Total count for pagination
|
||||
"""
|
||||
try:
|
||||
# Get user_id from current_user (Clerk format - convert to int if needed)
|
||||
user_id_str = str(current_user.get('id', '')) if current_user else None
|
||||
|
||||
# Check if user_id column exists in the database
|
||||
from sqlalchemy import inspect
|
||||
inspector = inspect(db.bind)
|
||||
columns = [col['name'] for col in inspector.get_columns('task_execution_logs')]
|
||||
has_user_id_column = 'user_id' in columns
|
||||
|
||||
# If user_id column doesn't exist, we need to handle the query differently
|
||||
# to avoid SQLAlchemy trying to access a non-existent column
|
||||
if not has_user_id_column:
|
||||
# Query without user_id column - use explicit column selection
|
||||
from sqlalchemy import func
|
||||
|
||||
# Build query for count
|
||||
count_query = db.query(func.count(TaskExecutionLog.id)).join(
|
||||
MonitoringTask,
|
||||
TaskExecutionLog.task_id == MonitoringTask.id
|
||||
)
|
||||
|
||||
# Filter by status if provided
|
||||
if status:
|
||||
count_query = count_query.filter(TaskExecutionLog.status == status)
|
||||
|
||||
total_count = count_query.scalar() or 0
|
||||
|
||||
# Build query for data - select specific columns to avoid user_id
|
||||
query = db.query(
|
||||
TaskExecutionLog.id,
|
||||
TaskExecutionLog.task_id,
|
||||
TaskExecutionLog.execution_date,
|
||||
TaskExecutionLog.status,
|
||||
TaskExecutionLog.result_data,
|
||||
TaskExecutionLog.error_message,
|
||||
TaskExecutionLog.execution_time_ms,
|
||||
TaskExecutionLog.created_at,
|
||||
MonitoringTask
|
||||
).join(
|
||||
MonitoringTask,
|
||||
TaskExecutionLog.task_id == MonitoringTask.id
|
||||
)
|
||||
|
||||
# Filter by status if provided
|
||||
if status:
|
||||
query = query.filter(TaskExecutionLog.status == status)
|
||||
|
||||
# Get paginated results
|
||||
logs = query.order_by(TaskExecutionLog.execution_date.desc()).offset(offset).limit(limit).all()
|
||||
|
||||
# Format results for compatibility
|
||||
formatted_logs = []
|
||||
for log_tuple in logs:
|
||||
# Unpack the tuple
|
||||
log_id, task_id, execution_date, log_status, result_data, error_message, execution_time_ms, created_at, task = log_tuple
|
||||
|
||||
log_data = {
|
||||
'id': log_id,
|
||||
'task_id': task_id,
|
||||
'user_id': None, # No user_id column in database
|
||||
'execution_date': execution_date.isoformat() if execution_date else None,
|
||||
'status': log_status,
|
||||
'error_message': error_message,
|
||||
'execution_time_ms': execution_time_ms,
|
||||
'result_data': result_data,
|
||||
'created_at': created_at.isoformat() if created_at else None
|
||||
}
|
||||
|
||||
# Add task details
|
||||
if task:
|
||||
log_data['task'] = {
|
||||
'id': task.id,
|
||||
'task_title': task.task_title,
|
||||
'component_name': task.component_name,
|
||||
'metric': task.metric,
|
||||
'frequency': task.frequency
|
||||
}
|
||||
|
||||
formatted_logs.append(log_data)
|
||||
|
||||
return {
|
||||
'logs': formatted_logs,
|
||||
'total_count': total_count,
|
||||
'limit': limit,
|
||||
'offset': offset,
|
||||
'has_more': (offset + limit) < total_count,
|
||||
'is_scheduler_logs': False # Explicitly mark as execution logs, not scheduler logs
|
||||
}
|
||||
|
||||
# If user_id column exists, use the normal query path
|
||||
# Build query with eager loading of task relationship
|
||||
query = db.query(TaskExecutionLog).join(
|
||||
MonitoringTask,
|
||||
TaskExecutionLog.task_id == MonitoringTask.id
|
||||
).options(
|
||||
joinedload(TaskExecutionLog.task)
|
||||
)
|
||||
|
||||
# Filter by status if provided
|
||||
if status:
|
||||
query = query.filter(TaskExecutionLog.status == status)
|
||||
|
||||
# Filter by user_id if provided (for user isolation)
|
||||
if user_id_str and has_user_id_column:
|
||||
# Note: user_id in TaskExecutionLog is Integer, but we have Clerk string
|
||||
# For now, get all logs - can enhance later with user_id mapping
|
||||
pass
|
||||
|
||||
# Get total count
|
||||
total_count = query.count()
|
||||
|
||||
# Get paginated results
|
||||
logs = query.order_by(desc(TaskExecutionLog.execution_date)).offset(offset).limit(limit).all()
|
||||
|
||||
# Format results
|
||||
formatted_logs = []
|
||||
for log in logs:
|
||||
log_data = {
|
||||
'id': log.id,
|
||||
'task_id': log.task_id,
|
||||
'user_id': log.user_id if has_user_id_column else None,
|
||||
'execution_date': log.execution_date.isoformat() if log.execution_date else None,
|
||||
'status': log.status,
|
||||
'error_message': log.error_message,
|
||||
'execution_time_ms': log.execution_time_ms,
|
||||
'result_data': log.result_data,
|
||||
'created_at': log.created_at.isoformat() if log.created_at else None
|
||||
}
|
||||
|
||||
# Add task details if available
|
||||
if log.task:
|
||||
log_data['task'] = {
|
||||
'id': log.task.id,
|
||||
'task_title': log.task.task_title,
|
||||
'component_name': log.task.component_name,
|
||||
'metric': log.task.metric,
|
||||
'frequency': log.task.frequency
|
||||
}
|
||||
|
||||
formatted_logs.append(log_data)
|
||||
|
||||
return {
|
||||
'logs': formatted_logs,
|
||||
'total_count': total_count,
|
||||
'limit': limit,
|
||||
'offset': offset,
|
||||
'has_more': (offset + limit) < total_count,
|
||||
'is_scheduler_logs': False # Explicitly mark as execution logs, not scheduler logs
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting execution logs: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get execution logs: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/jobs")
|
||||
async def get_scheduler_jobs(
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get detailed information about all scheduled jobs.
|
||||
|
||||
Returns:
|
||||
- List of jobs with detailed information
|
||||
- Job ID, trigger type, next run time
|
||||
- User context (extracted from job ID/kwargs)
|
||||
- Job store name (from user's website root)
|
||||
"""
|
||||
try:
|
||||
scheduler = get_scheduler()
|
||||
all_jobs = scheduler.scheduler.get_jobs()
|
||||
|
||||
formatted_jobs = []
|
||||
for job in all_jobs:
|
||||
job_info = {
|
||||
'id': job.id,
|
||||
'trigger_type': type(job.trigger).__name__,
|
||||
'next_run_time': job.next_run_time.isoformat() if job.next_run_time else None,
|
||||
'jobstore': getattr(job, 'jobstore', 'default'),
|
||||
'user_id': None,
|
||||
'user_job_store': 'default',
|
||||
'function_name': None
|
||||
}
|
||||
|
||||
# Extract user_id from job
|
||||
user_id_from_job = None
|
||||
if hasattr(job, 'kwargs') and job.kwargs and job.kwargs.get('user_id'):
|
||||
user_id_from_job = job.kwargs.get('user_id')
|
||||
elif job.id and ('research_persona_' in job.id or 'facebook_persona_' in job.id):
|
||||
parts = job.id.split('_')
|
||||
if len(parts) >= 3:
|
||||
user_id_from_job = parts[2]
|
||||
|
||||
if user_id_from_job:
|
||||
job_info['user_id'] = user_id_from_job
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(user_id_from_job, db)
|
||||
job_info['user_job_store'] = user_job_store
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get job store for user {user_id_from_job}: {e}")
|
||||
|
||||
# Get function name if available
|
||||
if hasattr(job, 'func') and hasattr(job.func, '__name__'):
|
||||
job_info['function_name'] = job.func.__name__
|
||||
elif hasattr(job, 'func_ref'):
|
||||
job_info['function_name'] = str(job.func_ref)
|
||||
|
||||
formatted_jobs.append(job_info)
|
||||
|
||||
return {
|
||||
'jobs': formatted_jobs,
|
||||
'total_jobs': len(formatted_jobs),
|
||||
'recurring_jobs': 1, # check_due_tasks
|
||||
'one_time_jobs': len(formatted_jobs) - 1
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting scheduler jobs: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get scheduler jobs: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/event-history")
|
||||
async def get_scheduler_event_history(
|
||||
limit: int = Query(100, ge=1, le=1000),
|
||||
offset: int = Query(0, ge=0),
|
||||
event_type: Optional[str] = Query(None, regex="^(check_cycle|interval_adjustment|start|stop|job_scheduled|job_cancelled|job_completed|job_failed)$"),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get scheduler event history from database.
|
||||
|
||||
This endpoint returns historical scheduler events such as:
|
||||
- Check cycles (when scheduler runs and checks for due tasks)
|
||||
- Interval adjustments (when check interval changes)
|
||||
- Scheduler start/stop events
|
||||
- Job scheduled/cancelled events
|
||||
|
||||
Query Params:
|
||||
- limit: Number of events to return (1-1000, default: 100)
|
||||
- offset: Pagination offset (default: 0)
|
||||
- event_type: Filter by event type (check_cycle, interval_adjustment, start, stop, etc.)
|
||||
|
||||
Returns:
|
||||
- List of scheduler events with details
|
||||
- Total count for pagination
|
||||
"""
|
||||
try:
|
||||
# Build query
|
||||
query = db.query(SchedulerEventLog)
|
||||
|
||||
# Filter by event type if provided
|
||||
if event_type:
|
||||
query = query.filter(SchedulerEventLog.event_type == event_type)
|
||||
|
||||
# Get total count
|
||||
total_count = query.count()
|
||||
|
||||
# Get paginated results (most recent first)
|
||||
events = query.order_by(desc(SchedulerEventLog.event_date)).offset(offset).limit(limit).all()
|
||||
|
||||
# Format results
|
||||
formatted_events = []
|
||||
for event in events:
|
||||
event_data = {
|
||||
'id': event.id,
|
||||
'event_type': event.event_type,
|
||||
'event_date': event.event_date.isoformat() if event.event_date else None,
|
||||
'check_cycle_number': event.check_cycle_number,
|
||||
'check_interval_minutes': event.check_interval_minutes,
|
||||
'previous_interval_minutes': event.previous_interval_minutes,
|
||||
'new_interval_minutes': event.new_interval_minutes,
|
||||
'tasks_found': event.tasks_found,
|
||||
'tasks_executed': event.tasks_executed,
|
||||
'tasks_failed': event.tasks_failed,
|
||||
'tasks_by_type': event.tasks_by_type,
|
||||
'check_duration_seconds': event.check_duration_seconds,
|
||||
'active_strategies_count': event.active_strategies_count,
|
||||
'active_executions': event.active_executions,
|
||||
'job_id': event.job_id,
|
||||
'job_type': event.job_type,
|
||||
'user_id': event.user_id,
|
||||
'event_data': event.event_data,
|
||||
'error_message': event.error_message,
|
||||
'created_at': event.created_at.isoformat() if event.created_at else None
|
||||
}
|
||||
formatted_events.append(event_data)
|
||||
|
||||
return {
|
||||
'events': formatted_events,
|
||||
'total_count': total_count,
|
||||
'limit': limit,
|
||||
'offset': offset,
|
||||
'has_more': (offset + limit) < total_count
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting scheduler event history: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get scheduler event history: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/recent-scheduler-logs")
|
||||
async def get_recent_scheduler_logs(
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get recent scheduler logs (restoration, job scheduling, etc.) for display in Execution Logs.
|
||||
These are informational logs that show scheduler activity when actual execution logs are not available.
|
||||
|
||||
Returns only the latest 5 logs (rolling window, not accumulating).
|
||||
|
||||
Returns:
|
||||
- List of latest 5 scheduler events (job_scheduled, job_completed, job_failed)
|
||||
- Formatted as execution log-like entries for display
|
||||
"""
|
||||
try:
|
||||
# Get only the latest 5 scheduler events - simple rolling window
|
||||
# Focus on job-related events that indicate scheduler activity
|
||||
query = db.query(SchedulerEventLog).filter(
|
||||
SchedulerEventLog.event_type.in_(['job_scheduled', 'job_completed', 'job_failed'])
|
||||
).order_by(desc(SchedulerEventLog.event_date)).limit(5)
|
||||
|
||||
events = query.all()
|
||||
|
||||
# Log for debugging - show more details
|
||||
logger.warning(
|
||||
f"[Dashboard] Recent scheduler logs query: found {len(events)} events"
|
||||
)
|
||||
if events:
|
||||
for e in events:
|
||||
logger.warning(
|
||||
f"[Dashboard] - Event: {e.event_type} | "
|
||||
f"Job ID: {e.job_id} | User: {e.user_id} | "
|
||||
f"Date: {e.event_date} | Error: {bool(e.error_message)}"
|
||||
)
|
||||
else:
|
||||
# Check if there are ANY events of these types
|
||||
total_count = db.query(func.count(SchedulerEventLog.id)).filter(
|
||||
SchedulerEventLog.event_type.in_(['job_scheduled', 'job_completed', 'job_failed'])
|
||||
).scalar() or 0
|
||||
logger.warning(
|
||||
f"[Dashboard] No recent scheduler logs found (query returned 0). "
|
||||
f"Total events of these types in DB: {total_count}"
|
||||
)
|
||||
|
||||
# Format as execution log-like entries
|
||||
formatted_logs = []
|
||||
for event in events:
|
||||
event_data = event.event_data or {}
|
||||
|
||||
# Determine status based on event type
|
||||
status = 'running'
|
||||
if event.event_type == 'job_completed':
|
||||
status = 'success'
|
||||
elif event.event_type == 'job_failed':
|
||||
status = 'failed'
|
||||
|
||||
# Extract job function name
|
||||
job_function = event_data.get('job_function') or event_data.get('function_name') or 'unknown'
|
||||
|
||||
# Extract execution time if available
|
||||
execution_time_ms = None
|
||||
if event_data.get('execution_time_seconds'):
|
||||
execution_time_ms = int(event_data.get('execution_time_seconds', 0) * 1000)
|
||||
|
||||
log_entry = {
|
||||
'id': f"scheduler_event_{event.id}",
|
||||
'task_id': None,
|
||||
'user_id': event.user_id,
|
||||
'execution_date': event.event_date.isoformat() if event.event_date else None,
|
||||
'status': status,
|
||||
'error_message': event.error_message,
|
||||
'execution_time_ms': execution_time_ms,
|
||||
'result_data': None,
|
||||
'created_at': event.created_at.isoformat() if event.created_at else None,
|
||||
'task': {
|
||||
'id': None,
|
||||
'task_title': f"{event.event_type.replace('_', ' ').title()}: {event.job_id or 'N/A'}",
|
||||
'component_name': 'Scheduler',
|
||||
'metric': job_function,
|
||||
'frequency': 'one-time'
|
||||
},
|
||||
'is_scheduler_log': True, # Flag to indicate this is a scheduler log, not execution log
|
||||
'event_type': event.event_type,
|
||||
'job_id': event.job_id
|
||||
}
|
||||
|
||||
formatted_logs.append(log_entry)
|
||||
|
||||
# Log the formatted response for debugging
|
||||
logger.warning(
|
||||
f"[Dashboard] Formatted {len(formatted_logs)} scheduler logs for response. "
|
||||
f"Sample log entry keys: {list(formatted_logs[0].keys()) if formatted_logs else 'none'}"
|
||||
)
|
||||
|
||||
return {
|
||||
'logs': formatted_logs,
|
||||
'total_count': len(formatted_logs),
|
||||
'limit': 5,
|
||||
'offset': 0,
|
||||
'has_more': False,
|
||||
'is_scheduler_logs': True # Indicate these are scheduler logs, not execution logs
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting recent scheduler logs: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get recent scheduler logs: {str(e)}")
|
||||
|
||||
Reference in New Issue
Block a user