Scheduled research persona generation

This commit is contained in:
ajaysi
2025-11-05 08:51:00 +05:30
parent 55087c4f37
commit d99c7c83a7
98 changed files with 14518 additions and 828 deletions

View File

@@ -0,0 +1,310 @@
"""
OAuth Token Monitoring API Routes
Provides endpoints for managing OAuth token monitoring tasks and manual triggers.
"""
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.orm import Session
from typing import List, Dict, Any, Optional
from datetime import datetime
from loguru import logger
from services.database import get_db_session
from middleware.auth_middleware import get_current_user
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask, OAuthTokenExecutionLog
from services.scheduler import get_scheduler
from services.oauth_token_monitoring_service import create_oauth_monitoring_tasks, get_connected_platforms
router = APIRouter(prefix="/api/oauth-tokens", tags=["oauth-tokens"])
@router.get("/status/{user_id}")
async def get_oauth_token_status(
user_id: str,
db: Session = Depends(get_db_session),
current_user: Dict[str, Any] = Depends(get_current_user)
) -> Dict[str, Any]:
"""
Get OAuth token monitoring status for all platforms for a user.
Returns:
- List of monitoring tasks with status
- Connection status for each platform
- Last check time, last success, last failure
"""
try:
# Verify user can only access their own data
if str(current_user.get('id')) != user_id:
raise HTTPException(status_code=403, detail="Access denied")
# Get all monitoring tasks for user
tasks = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.user_id == user_id
).all()
# Get connected platforms
logger.info(f"[OAuth Status API] Getting token status for user: {user_id}")
connected_platforms = get_connected_platforms(user_id)
logger.info(f"[OAuth Status API] Found {len(connected_platforms)} connected platforms: {connected_platforms}")
# Build status response
platform_status = {}
for platform in ['gsc', 'bing', 'wordpress', 'wix']:
task = next((t for t in tasks if t.platform == platform), None)
is_connected = platform in connected_platforms
platform_status[platform] = {
'connected': is_connected,
'monitoring_task': {
'id': task.id if task else None,
'status': task.status if task else 'not_created',
'last_check': task.last_check.isoformat() if task and task.last_check else None,
'last_success': task.last_success.isoformat() if task and task.last_success else None,
'last_failure': task.last_failure.isoformat() if task and task.last_failure else None,
'failure_reason': task.failure_reason if task else None,
'next_check': task.next_check.isoformat() if task and task.next_check else None,
} if task else None
}
logger.info(
f"[OAuth Status API] Platform {platform}: "
f"connected={is_connected}, "
f"task_exists={task is not None}, "
f"task_status={task.status if task else 'N/A'}"
)
response_data = {
"success": True,
"data": {
"user_id": user_id,
"platform_status": platform_status,
"connected_platforms": connected_platforms
}
}
logger.info(f"[OAuth Status API] Returning status for user {user_id}: {len(connected_platforms)} platforms connected")
return response_data
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting OAuth token status for user {user_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to get token status: {str(e)}")
@router.post("/refresh/{user_id}/{platform}")
async def manual_refresh_token(
user_id: str,
platform: str,
db: Session = Depends(get_db_session),
current_user: Dict[str, Any] = Depends(get_current_user)
) -> Dict[str, Any]:
"""
Manually trigger token refresh for a specific platform.
This will:
1. Find or create the monitoring task
2. Execute the token check/refresh immediately
3. Update the task status and next_check time
Args:
user_id: User ID
platform: Platform identifier ('gsc', 'bing', 'wordpress', 'wix')
"""
try:
# Verify user can only access their own data
if str(current_user.get('id')) != user_id:
raise HTTPException(status_code=403, detail="Access denied")
# Validate platform
valid_platforms = ['gsc', 'bing', 'wordpress', 'wix']
if platform not in valid_platforms:
raise HTTPException(
status_code=400,
detail=f"Invalid platform. Must be one of: {', '.join(valid_platforms)}"
)
# Get or create monitoring task
task = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.user_id == user_id,
OAuthTokenMonitoringTask.platform == platform
).first()
if not task:
# Create task if it doesn't exist
task = OAuthTokenMonitoringTask(
user_id=user_id,
platform=platform,
status='active',
next_check=datetime.utcnow(), # Set to now to trigger immediately
created_at=datetime.utcnow(),
updated_at=datetime.utcnow()
)
db.add(task)
db.commit()
db.refresh(task)
logger.info(f"Created monitoring task for manual refresh: user={user_id}, platform={platform}")
# Get scheduler and executor
scheduler = get_scheduler()
try:
executor = scheduler.registry.get_executor('oauth_token_monitoring')
except ValueError:
raise HTTPException(status_code=500, detail="OAuth token monitoring executor not available")
# Execute task immediately
logger.info(f"Manually triggering token refresh: user={user_id}, platform={platform}")
result = await executor.execute_task(task, db)
# Get updated task
db.refresh(task)
return {
"success": result.success,
"message": "Token refresh completed" if result.success else "Token refresh failed",
"data": {
"platform": platform,
"status": task.status,
"last_check": task.last_check.isoformat() if task.last_check else None,
"last_success": task.last_success.isoformat() if task.last_success else None,
"last_failure": task.last_failure.isoformat() if task.last_failure else None,
"failure_reason": task.failure_reason,
"next_check": task.next_check.isoformat() if task.next_check else None,
"execution_result": {
"success": result.success,
"error_message": result.error_message,
"execution_time_ms": result.execution_time_ms,
"result_data": result.result_data
}
}
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error manually refreshing token for user {user_id}, platform {platform}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to refresh token: {str(e)}")
@router.get("/execution-logs/{user_id}")
async def get_execution_logs(
user_id: str,
platform: Optional[str] = Query(None, description="Filter by platform"),
limit: int = Query(50, ge=1, le=100, description="Maximum number of logs"),
offset: int = Query(0, ge=0, description="Offset for pagination"),
db: Session = Depends(get_db_session),
current_user: Dict[str, Any] = Depends(get_current_user)
) -> Dict[str, Any]:
"""
Get execution logs for OAuth token monitoring tasks.
Args:
user_id: User ID
platform: Optional platform filter
limit: Maximum number of logs to return
offset: Pagination offset
"""
try:
# Verify user can only access their own data
if str(current_user.get('id')) != user_id:
raise HTTPException(status_code=403, detail="Access denied")
# Build query
query = db.query(OAuthTokenExecutionLog).join(
OAuthTokenMonitoringTask,
OAuthTokenExecutionLog.task_id == OAuthTokenMonitoringTask.id
).filter(
OAuthTokenMonitoringTask.user_id == user_id
)
# Apply platform filter if provided
if platform:
query = query.filter(OAuthTokenMonitoringTask.platform == platform)
# Get total count
total_count = query.count()
# Get paginated logs
logs = query.order_by(
OAuthTokenExecutionLog.execution_date.desc()
).offset(offset).limit(limit).all()
# Format logs
logs_data = []
for log in logs:
logs_data.append({
"id": log.id,
"task_id": log.task_id,
"platform": log.task.platform, # Get platform from relationship
"execution_date": log.execution_date.isoformat(),
"status": log.status,
"result_data": log.result_data,
"error_message": log.error_message,
"execution_time_ms": log.execution_time_ms,
"created_at": log.created_at.isoformat()
})
return {
"success": True,
"data": {
"logs": logs_data,
"total_count": total_count,
"limit": limit,
"offset": offset
}
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error getting execution logs for user {user_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to get execution logs: {str(e)}")
@router.post("/create-tasks/{user_id}")
async def create_monitoring_tasks(
user_id: str,
platforms: Optional[List[str]] = None,
db: Session = Depends(get_db_session),
current_user: Dict[str, Any] = Depends(get_current_user)
) -> Dict[str, Any]:
"""
Manually create OAuth token monitoring tasks for a user.
If platforms are not provided, automatically detects connected platforms.
Args:
user_id: User ID
platforms: Optional list of platforms to create tasks for
"""
try:
# Verify user can only access their own data
if str(current_user.get('id')) != user_id:
raise HTTPException(status_code=403, detail="Access denied")
# Create tasks
tasks = create_oauth_monitoring_tasks(user_id, db, platforms)
return {
"success": True,
"message": f"Created {len(tasks)} monitoring task(s)",
"data": {
"tasks_created": len(tasks),
"tasks": [
{
"id": task.id,
"platform": task.platform,
"status": task.status,
"next_check": task.next_check.isoformat() if task.next_check else None
}
for task in tasks
]
}
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error creating monitoring tasks for user {user_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to create monitoring tasks: {str(e)}")

View File

@@ -12,6 +12,9 @@ from services.onboarding.progress_service import get_onboarding_progress_service
from services.onboarding.database_service import OnboardingDatabaseService
from services.database import get_db
from services.persona_analysis_service import PersonaAnalysisService
from services.research.research_persona_scheduler import schedule_research_persona_generation
from services.persona.facebook.facebook_persona_scheduler import schedule_facebook_persona_generation
from services.oauth_token_monitoring_service import create_oauth_monitoring_tasks
class OnboardingCompletionService:
"""Service for handling onboarding completion logic."""
@@ -46,6 +49,38 @@ class OnboardingCompletionService:
if not success:
raise HTTPException(status_code=500, detail="Failed to mark onboarding as complete")
# Schedule research persona generation 20 minutes after onboarding completion
try:
schedule_research_persona_generation(user_id, delay_minutes=20)
logger.info(f"Scheduled research persona generation for user {user_id} (20 minutes after onboarding)")
except Exception as e:
# Non-critical: log but don't fail onboarding completion
logger.warning(f"Failed to schedule research persona generation for user {user_id}: {e}")
# Schedule Facebook persona generation 20 minutes after onboarding completion
try:
schedule_facebook_persona_generation(user_id, delay_minutes=20)
logger.info(f"Scheduled Facebook persona generation for user {user_id} (20 minutes after onboarding)")
except Exception as e:
# Non-critical: log but don't fail onboarding completion
logger.warning(f"Failed to schedule Facebook persona generation for user {user_id}: {e}")
# Create OAuth token monitoring tasks for connected platforms
try:
from services.database import SessionLocal
db = SessionLocal()
try:
monitoring_tasks = create_oauth_monitoring_tasks(user_id, db)
logger.info(
f"Created {len(monitoring_tasks)} OAuth token monitoring tasks for user {user_id} "
f"on onboarding completion"
)
finally:
db.close()
except Exception as e:
# Non-critical: log but don't fail onboarding completion
logger.warning(f"Failed to create OAuth token monitoring tasks for user {user_id}: {e}")
return {
"message": "Onboarding completed successfully",
"completed_at": datetime.now().isoformat(),

View File

@@ -380,6 +380,41 @@ async def generate_platform_persona(user_id: str, platform: str, db_session):
logger.error(f"Error generating {platform} persona: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to generate {platform} persona: {str(e)}")
async def check_facebook_persona(user_id: str, db: Session):
"""Check if Facebook persona exists for user."""
try:
from services.persona_data_service import PersonaDataService
persona_data_service = PersonaDataService(db_session=db)
persona_data = persona_data_service.get_user_persona_data(user_id)
if not persona_data:
return {
"has_persona": False,
"has_core_persona": False,
"message": "No persona data found",
"onboarding_completed": False
}
platform_personas = persona_data.get('platform_personas', {})
facebook_persona = platform_personas.get('facebook') if platform_personas else None
# Check if core persona exists
has_core_persona = bool(persona_data.get('core_persona'))
# Assume onboarding is completed if persona data exists
onboarding_completed = True
return {
"has_persona": bool(facebook_persona),
"has_core_persona": has_core_persona,
"persona": facebook_persona,
"onboarding_completed": onboarding_completed
}
except Exception as e:
logger.error(f"Error checking Facebook persona for user {user_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
async def validate_persona_generation_readiness(user_id: int):
"""Check if user has sufficient onboarding data for persona generation."""
try:

View File

@@ -36,7 +36,7 @@ from api.persona import (
)
from services.persona_replication_engine import PersonaReplicationEngine
from api.persona import update_platform_persona, generate_platform_persona
from api.persona import update_platform_persona, generate_platform_persona, check_facebook_persona
# Create router
router = APIRouter(prefix="/api/personas", tags=["personas"])
@@ -248,4 +248,12 @@ async def update_platform_persona_endpoint(
Allows editing persona fields in the UI and saving them to the database.
"""
# Beta testing: Force user_id=1 for all requests
return await update_platform_persona(1, platform, update_data)
return await update_platform_persona(1, platform, update_data)
@router.get("/facebook-persona/check/{user_id}")
async def check_facebook_persona_endpoint(
user_id: str,
db: Session = Depends(get_db)
):
"""Check if Facebook persona exists for user."""
return await check_facebook_persona(user_id, db)

View File

@@ -0,0 +1,398 @@
"""
Research Configuration API
Provides provider availability and persona-aware defaults for research.
"""
from fastapi import APIRouter, Depends, HTTPException, Query
from typing import Dict, Any, Optional
from loguru import logger
from pydantic import BaseModel
from middleware.auth_middleware import get_current_user
from services.user_api_key_context import get_exa_key, get_gemini_key
from services.onboarding.database_service import OnboardingDatabaseService
from services.onboarding.progress_service import get_onboarding_progress_service
from services.database import get_db
from sqlalchemy.orm import Session
from services.research.research_persona_service import ResearchPersonaService
from services.research.research_persona_scheduler import schedule_research_persona_generation
from models.research_persona_models import ResearchPersona
router = APIRouter()
class ProviderAvailability(BaseModel):
"""Provider availability status."""
google_available: bool
exa_available: bool
gemini_key_status: str # 'configured' | 'missing'
exa_key_status: str # 'configured' | 'missing'
class PersonaDefaults(BaseModel):
"""Persona-aware research defaults."""
industry: Optional[str] = None
target_audience: Optional[str] = None
suggested_domains: list[str] = []
suggested_exa_category: Optional[str] = None
class ResearchConfigResponse(BaseModel):
"""Combined research configuration response."""
provider_availability: ProviderAvailability
persona_defaults: PersonaDefaults
research_persona: Optional[ResearchPersona] = None
onboarding_completed: bool = False
persona_scheduled: bool = False
@router.get("/provider-availability", response_model=ProviderAvailability)
async def get_provider_availability(
current_user: Dict = Depends(get_current_user)
):
"""
Check which research providers are available for the current user.
Returns:
- google_available: True if Gemini key is configured
- exa_available: True if Exa key is configured
- Key status for each provider
"""
try:
user_id = str(current_user.get('id'))
# Check API key availability
gemini_key = get_gemini_key(user_id)
exa_key = get_exa_key(user_id)
google_available = bool(gemini_key and gemini_key.strip())
exa_available = bool(exa_key and exa_key.strip())
return ProviderAvailability(
google_available=google_available,
exa_available=exa_available,
gemini_key_status='configured' if google_available else 'missing',
exa_key_status='configured' if exa_available else 'missing'
)
except Exception as e:
logger.error(f"[ResearchConfig] Error checking provider availability for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to check provider availability: {str(e)}")
@router.get("/persona-defaults", response_model=PersonaDefaults)
async def get_persona_defaults(
current_user: Dict = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get persona-aware research defaults for the current user.
Returns industry, target audience, and smart suggestions based on onboarding data.
"""
try:
user_id = str(current_user.get('id'))
# Add explicit null check for database session
if not db:
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_persona_defaults")
# Return defaults rather than error
return PersonaDefaults()
db_service = OnboardingDatabaseService(db=db)
# Try to get persona data first (most reliable source for industry/target_audience)
persona_data = db_service.get_persona_data(user_id, db)
industry = 'General'
target_audience = 'General'
if persona_data:
core_persona = persona_data.get('corePersona') or persona_data.get('core_persona')
if core_persona:
if core_persona.get('industry'):
industry = core_persona['industry']
if core_persona.get('target_audience'):
target_audience = core_persona['target_audience']
# Fallback to website analysis if persona data doesn't have industry info
if industry == 'General':
website_analysis = db_service.get_website_analysis(user_id, db)
if website_analysis:
target_audience_data = website_analysis.get('target_audience', {})
if isinstance(target_audience_data, dict):
# Extract from target_audience JSON field
industry_focus = target_audience_data.get('industry_focus')
if industry_focus:
industry = industry_focus
demographics = target_audience_data.get('demographics')
if demographics:
target_audience = demographics if isinstance(demographics, str) else str(demographics)
# Suggest domains based on industry
suggested_domains = _get_domain_suggestions(industry)
# Suggest Exa category based on industry
suggested_exa_category = _get_exa_category_suggestion(industry)
return PersonaDefaults(
industry=industry,
target_audience=target_audience,
suggested_domains=suggested_domains,
suggested_exa_category=suggested_exa_category
)
except Exception as e:
logger.error(f"[ResearchConfig] Error getting persona defaults for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
# Return defaults rather than error
return PersonaDefaults()
@router.get("/research-persona")
async def get_research_persona(
current_user: Dict = Depends(get_current_user),
db: Session = Depends(get_db),
force_refresh: bool = Query(False, description="Force regenerate persona even if cache is valid")
):
"""
Get or generate research persona for the current user.
Query params:
- force_refresh: If true, regenerate persona even if cache is valid (default: false)
Returns research persona with personalized defaults, suggestions, and configurations.
"""
try:
user_id = str(current_user.get('id'))
if not user_id:
raise HTTPException(status_code=401, detail="User not authenticated")
# Add explicit null check for database session
if not db:
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_research_persona")
raise HTTPException(status_code=500, detail="Database not available")
persona_service = ResearchPersonaService(db_session=db)
research_persona = persona_service.get_or_generate(user_id, force_refresh=force_refresh)
if not research_persona:
raise HTTPException(
status_code=404,
detail="Research persona not available. Complete onboarding to generate one."
)
return research_persona.dict()
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) to preserve status code and details
raise
except Exception as e:
logger.error(f"[ResearchConfig] Error getting research persona for user {user_id if 'user_id' in locals() else 'unknown'}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to get research persona: {str(e)}")
@router.get("/config", response_model=ResearchConfigResponse)
async def get_research_config(
current_user: Dict = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get complete research configuration including provider availability and persona defaults.
"""
user_id = None
try:
user_id = str(current_user.get('id'))
logger.info(f"[ResearchConfig] Starting get_research_config for user {user_id}")
# Add explicit null check for database session
if not db:
logger.error(f"[ResearchConfig] Database session is None for user {user_id} in get_research_config")
raise HTTPException(status_code=500, detail="Database session not available")
# Get provider availability
logger.debug(f"[ResearchConfig] Getting provider availability for user {user_id}")
gemini_key = get_gemini_key(user_id)
exa_key = get_exa_key(user_id)
google_available = bool(gemini_key and gemini_key.strip())
exa_available = bool(exa_key and exa_key.strip())
provider_availability = ProviderAvailability(
google_available=google_available,
exa_available=exa_available,
gemini_key_status='configured' if google_available else 'missing',
exa_key_status='configured' if exa_available else 'missing'
)
# Get persona defaults
logger.debug(f"[ResearchConfig] Getting persona defaults for user {user_id}")
db_service = OnboardingDatabaseService(db=db)
# Try to get persona data first (most reliable source for industry/target_audience)
try:
persona_data = db_service.get_persona_data(user_id, db)
except Exception as e:
logger.error(f"[ResearchConfig] Error getting persona data for user {user_id}: {e}", exc_info=True)
persona_data = None
industry = 'General'
target_audience = 'General'
if persona_data:
core_persona = persona_data.get('corePersona') or persona_data.get('core_persona')
if core_persona:
if core_persona.get('industry'):
industry = core_persona['industry']
if core_persona.get('target_audience'):
target_audience = core_persona['target_audience']
# Fallback to website analysis if persona data doesn't have industry info
if industry == 'General':
website_analysis = db_service.get_website_analysis(user_id, db)
if website_analysis:
target_audience_data = website_analysis.get('target_audience', {})
if isinstance(target_audience_data, dict):
# Extract from target_audience JSON field
industry_focus = target_audience_data.get('industry_focus')
if industry_focus:
industry = industry_focus
demographics = target_audience_data.get('demographics')
if demographics:
target_audience = demographics if isinstance(demographics, str) else str(demographics)
persona_defaults = PersonaDefaults(
industry=industry,
target_audience=target_audience,
suggested_domains=_get_domain_suggestions(industry),
suggested_exa_category=_get_exa_category_suggestion(industry)
)
# Check onboarding completion status
onboarding_completed = False
try:
logger.debug(f"[ResearchConfig] Checking onboarding status for user {user_id}")
progress_service = get_onboarding_progress_service()
onboarding_status = progress_service.get_onboarding_status(user_id)
onboarding_completed = onboarding_status.get('is_completed', False)
logger.info(
f"[ResearchConfig] Onboarding status check for user {user_id}: "
f"is_completed={onboarding_completed}, "
f"current_step={onboarding_status.get('current_step')}, "
f"progress={onboarding_status.get('completion_percentage')}"
)
except Exception as e:
logger.error(f"[ResearchConfig] Could not check onboarding status for user {user_id}: {e}", exc_info=True)
# Continue with onboarding_completed=False
# Get research persona (optional, may not exist for all users)
# CRITICAL: Use get_cached_only() to avoid triggering rate limit checks
# Only return persona if it's already cached - don't generate on config load
research_persona = None
persona_scheduled = False
try:
logger.debug(f"[ResearchConfig] Getting cached research persona for user {user_id}")
persona_service = ResearchPersonaService(db_session=db)
research_persona = persona_service.get_cached_only(user_id)
logger.info(
f"[ResearchConfig] Research persona check for user {user_id}: "
f"persona_exists={research_persona is not None}, "
f"onboarding_completed={onboarding_completed}"
)
# If onboarding is completed but persona doesn't exist, schedule generation
if onboarding_completed and not research_persona:
try:
# Check if persona data exists (to ensure we have data to generate from)
db_service = OnboardingDatabaseService(db=db)
persona_data = db_service.get_persona_data(user_id, db)
if persona_data and (persona_data.get('corePersona') or persona_data.get('platformPersonas') or
persona_data.get('core_persona') or persona_data.get('platform_personas')):
# Schedule persona generation (20 minutes from now)
schedule_research_persona_generation(user_id, delay_minutes=20)
logger.info(f"Scheduled research persona generation for user {user_id} (onboarding already completed)")
persona_scheduled = True
else:
logger.info(f"Onboarding completed but no persona data found for user {user_id} - cannot schedule persona generation")
except Exception as e:
logger.warning(f"Failed to schedule research persona generation: {e}", exc_info=True)
except Exception as e:
# get_cached_only() never raises HTTPException, but catch any unexpected errors
logger.warning(f"[ResearchConfig] Could not load cached research persona for user {user_id}: {e}", exc_info=True)
# FastAPI will automatically serialize the ResearchPersona Pydantic model
# If there's a serialization issue, we catch it and log it
try:
response = ResearchConfigResponse(
provider_availability=provider_availability,
persona_defaults=persona_defaults,
research_persona=research_persona,
onboarding_completed=onboarding_completed,
persona_scheduled=persona_scheduled
)
except Exception as serialization_error:
logger.error(f"[ResearchConfig] Failed to create ResearchConfigResponse for user {user_id}: {serialization_error}", exc_info=True)
# Try without research_persona as fallback
response = ResearchConfigResponse(
provider_availability=provider_availability,
persona_defaults=persona_defaults,
research_persona=None,
onboarding_completed=onboarding_completed,
persona_scheduled=persona_scheduled
)
logger.info(
f"[ResearchConfig] Response for user {user_id}: "
f"onboarding_completed={onboarding_completed}, "
f"persona_exists={research_persona is not None}, "
f"persona_scheduled={persona_scheduled}"
)
return response
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429, 401, etc.) to preserve status codes
raise
except Exception as e:
logger.error(f"[ResearchConfig] CRITICAL ERROR getting research config for user {user_id if user_id else 'unknown'}: {e}", exc_info=True)
import traceback
logger.error(f"[ResearchConfig] Full traceback:\n{traceback.format_exc()}")
raise HTTPException(
status_code=500,
detail=f"Failed to get research config: {str(e)}"
)
# Helper functions from RESEARCH_AI_HYPERPERSONALIZATION.md
def _get_domain_suggestions(industry: str) -> list[str]:
"""Get domain suggestions based on industry."""
domain_map = {
'Healthcare': ['pubmed.gov', 'nejm.org', 'thelancet.com', 'nih.gov'],
'Technology': ['techcrunch.com', 'wired.com', 'arstechnica.com', 'theverge.com'],
'Finance': ['wsj.com', 'bloomberg.com', 'ft.com', 'reuters.com'],
'Science': ['nature.com', 'sciencemag.org', 'cell.com', 'pnas.org'],
'Business': ['hbr.org', 'forbes.com', 'businessinsider.com', 'mckinsey.com'],
'Marketing': ['marketingland.com', 'adweek.com', 'hubspot.com', 'moz.com'],
'Education': ['edutopia.org', 'chronicle.com', 'insidehighered.com'],
'Real Estate': ['realtor.com', 'zillow.com', 'forbes.com'],
'Entertainment': ['variety.com', 'hollywoodreporter.com', 'deadline.com'],
'Travel': ['lonelyplanet.com', 'nationalgeographic.com', 'travelandleisure.com'],
'Fashion': ['vogue.com', 'elle.com', 'wwd.com'],
'Sports': ['espn.com', 'si.com', 'bleacherreport.com'],
'Law': ['law.com', 'abajournal.com', 'scotusblog.com'],
}
return domain_map.get(industry, [])
def _get_exa_category_suggestion(industry: str) -> Optional[str]:
"""Get Exa category suggestion based on industry."""
category_map = {
'Healthcare': 'research paper',
'Science': 'research paper',
'Finance': 'financial report',
'Technology': 'company',
'Business': 'company',
'Marketing': 'company',
'Education': 'research paper',
'Law': 'pdf',
}
return category_map.get(industry)

View File

@@ -0,0 +1,706 @@
"""
Scheduler Dashboard API
Provides endpoints for scheduler dashboard UI.
"""
from fastapi import APIRouter, HTTPException, Depends, Query
from typing import Dict, Any, Optional, List
from sqlalchemy.orm import Session, joinedload
from sqlalchemy import desc, func
from datetime import datetime
from loguru import logger
from services.scheduler import get_scheduler
from services.scheduler.utils.user_job_store import get_user_job_store_name
from services.monitoring_data_service import MonitoringDataService
from services.database import get_db
from middleware.auth_middleware import get_current_user
from models.monitoring_models import TaskExecutionLog, MonitoringTask
from models.scheduler_models import SchedulerEventLog
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
from sqlalchemy import func
router = APIRouter(prefix="/api/scheduler", tags=["scheduler-dashboard"])
@router.get("/dashboard")
async def get_scheduler_dashboard(
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get scheduler dashboard statistics and current state.
Returns:
- Scheduler stats (total checks, tasks executed, failed, etc.)
- Current scheduled jobs
- Active strategies count
- Check interval
- User isolation status
- Last check timestamp
"""
try:
scheduler = get_scheduler()
# Get user_id from current_user (Clerk format)
user_id_str = str(current_user.get('id', '')) if current_user else None
# Get scheduler stats
stats = scheduler.get_stats(user_id=None) # Get all stats for dashboard
# Get all scheduled jobs
all_jobs = scheduler.scheduler.get_jobs()
# Format jobs with user context
formatted_jobs = []
for job in all_jobs:
job_info = {
'id': job.id,
'trigger_type': type(job.trigger).__name__,
'next_run_time': job.next_run_time.isoformat() if job.next_run_time else None,
'user_id': None,
'job_store': 'default',
'user_job_store': 'default'
}
# Extract user_id from job
user_id_from_job = None
if hasattr(job, 'kwargs') and job.kwargs and job.kwargs.get('user_id'):
user_id_from_job = job.kwargs.get('user_id')
elif job.id and ('research_persona_' in job.id or 'facebook_persona_' in job.id):
parts = job.id.split('_')
if len(parts) >= 3:
user_id_from_job = parts[2]
if user_id_from_job:
job_info['user_id'] = user_id_from_job
try:
user_job_store = get_user_job_store_name(user_id_from_job, db)
job_info['user_job_store'] = user_job_store
except Exception as e:
logger.debug(f"Could not get job store for user {user_id_from_job}: {e}")
formatted_jobs.append(job_info)
# Add OAuth token monitoring tasks from database (these are recurring weekly tasks)
try:
oauth_tasks = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.status == 'active'
).all()
oauth_tasks_count = len(oauth_tasks)
if oauth_tasks_count > 0:
# Log platform breakdown for debugging
platforms = {}
for task in oauth_tasks:
platforms[task.platform] = platforms.get(task.platform, 0) + 1
platform_summary = ", ".join([f"{platform}: {count}" for platform, count in platforms.items()])
logger.warning(
f"[Dashboard] OAuth Monitoring: Found {oauth_tasks_count} active OAuth token monitoring tasks "
f"({platform_summary})"
)
else:
# Check if there are any inactive tasks
all_oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
if all_oauth_tasks:
inactive_by_status = {}
for task in all_oauth_tasks:
status = task.status
inactive_by_status[status] = inactive_by_status.get(status, 0) + 1
logger.warning(
f"[Dashboard] OAuth Monitoring: Found {len(all_oauth_tasks)} total OAuth tasks, "
f"but {oauth_tasks_count} are active. Status breakdown: {inactive_by_status}"
)
for task in oauth_tasks:
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
user_job_store = 'default'
logger.debug(f"Could not get job store for user {task.user_id}: {e}")
# Format as recurring weekly job
job_info = {
'id': f"oauth_token_monitoring_{task.platform}_{task.user_id}",
'trigger_type': 'CronTrigger', # Weekly recurring
'next_run_time': task.next_check.isoformat() if task.next_check else None,
'user_id': task.user_id,
'job_store': 'default',
'user_job_store': user_job_store,
'function_name': 'oauth_token_monitoring_executor.execute_task',
'platform': task.platform,
'task_id': task.id,
'is_database_task': True, # Flag to indicate this is a DB task, not APScheduler job
'frequency': 'Weekly'
}
formatted_jobs.append(job_info)
except Exception as e:
logger.error(f"Error loading OAuth token monitoring tasks: {e}", exc_info=True)
# Get active strategies count
active_strategies = stats.get('active_strategies_count', 0)
# Get last_update from stats (added by scheduler for frontend polling)
last_update = stats.get('last_update')
# Calculate cumulative/historical values from scheduler_event_logs
cumulative_stats = {}
try:
# First, check total events in database for debugging
total_events = db.query(func.count(SchedulerEventLog.id)).scalar() or 0
# Check for check_cycle events specifically
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'check_cycle'
).scalar() or 0
# Also check for other event types that might have task counts
job_failed_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'job_failed'
).scalar() or 0
job_completed_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'job_completed'
).scalar() or 0
logger.warning(
f"[Dashboard] Database stats: {total_events} total events, "
f"{check_cycle_count} check_cycles, {job_failed_count} job_failed, "
f"{job_completed_count} job_completed"
)
if check_cycle_count > 0:
logger.warning(f"[Dashboard] Found {check_cycle_count} check cycle events in database")
# Aggregate check cycle events for cumulative totals
result = db.query(
func.count(SchedulerEventLog.id),
func.sum(SchedulerEventLog.tasks_found),
func.sum(SchedulerEventLog.tasks_executed),
func.sum(SchedulerEventLog.tasks_failed)
).filter(
SchedulerEventLog.event_type == 'check_cycle'
).first()
if result:
# SQLAlchemy returns tuple for multi-column queries
# SUM returns NULL when no rows, handle that
total_cycles = result[0] if result[0] is not None else 0
total_found = result[1] if result[1] is not None else 0
total_executed = result[2] if result[2] is not None else 0
total_failed = result[3] if result[3] is not None else 0
cumulative_stats = {
'total_check_cycles': int(total_cycles),
'cumulative_tasks_found': int(total_found),
'cumulative_tasks_executed': int(total_executed),
'cumulative_tasks_failed': int(total_failed)
}
logger.warning(f"[Dashboard] Cumulative stats from check_cycles: {cumulative_stats}")
else:
# No results (shouldn't happen with COUNT, but handle it)
cumulative_stats = {
'total_check_cycles': 0,
'cumulative_tasks_found': 0,
'cumulative_tasks_executed': 0,
'cumulative_tasks_failed': 0
}
logger.warning("[Dashboard] Query returned None (no check cycle events)")
else:
# No check cycles yet, but we can still show job counts
# Log detailed info about why cumulative stats are 0
if stats.get('total_checks', 0) > 0:
logger.warning(
f"[Dashboard] ⚠️ Scheduler shows {stats.get('total_checks', 0)} checks in memory, "
f"but NO check_cycle events found in database. "
f"This suggests check_cycle events are not being saved properly."
)
else:
logger.warning(
f"[Dashboard] No check_cycle events yet. "
f"Scheduler interval: {stats.get('check_interval_minutes', 60)}min. "
f"First check cycle will run after interval expires. "
f"One-time jobs: {job_completed_count} completed, {job_failed_count} failed"
)
except Exception as e:
logger.error(f"Error calculating cumulative stats: {e}", exc_info=True)
cumulative_stats = {
'total_check_cycles': 0,
'cumulative_tasks_found': 0,
'cumulative_tasks_executed': 0,
'cumulative_tasks_failed': 0
}
return {
'stats': {
# Current session stats (from scheduler memory)
'total_checks': stats.get('total_checks', 0),
'tasks_found': stats.get('tasks_found', 0),
'tasks_executed': stats.get('tasks_executed', 0),
'tasks_failed': stats.get('tasks_failed', 0),
'tasks_skipped': stats.get('tasks_skipped', 0),
'last_check': stats.get('last_check'),
'last_update': last_update, # Include for frontend polling
'active_executions': stats.get('active_executions', 0),
'running': stats.get('running', False),
'check_interval_minutes': stats.get('check_interval_minutes', 60),
'min_check_interval_minutes': stats.get('min_check_interval_minutes', 15),
'max_check_interval_minutes': stats.get('max_check_interval_minutes', 60),
'intelligent_scheduling': stats.get('intelligent_scheduling', True),
'active_strategies_count': active_strategies,
'last_interval_adjustment': stats.get('last_interval_adjustment'),
'registered_types': stats.get('registered_types', []),
# Cumulative/historical stats (from database)
'cumulative_total_check_cycles': cumulative_stats.get('total_check_cycles', 0),
'cumulative_tasks_found': cumulative_stats.get('cumulative_tasks_found', 0),
'cumulative_tasks_executed': cumulative_stats.get('cumulative_tasks_executed', 0),
'cumulative_tasks_failed': cumulative_stats.get('cumulative_tasks_failed', 0)
},
'jobs': formatted_jobs,
'job_count': len(formatted_jobs),
'recurring_jobs': 1 + len([j for j in formatted_jobs if j.get('is_database_task')]), # check_due_tasks + OAuth tasks
'one_time_jobs': len([j for j in formatted_jobs if not j.get('is_database_task') and j.get('trigger_type') == 'DateTrigger']),
'user_isolation': {
'enabled': True,
'current_user_id': user_id_str
},
'last_updated': datetime.utcnow().isoformat() # Keep for backward compatibility
}
except Exception as e:
logger.error(f"Error getting scheduler dashboard: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get scheduler dashboard: {str(e)}")
@router.get("/execution-logs")
async def get_execution_logs(
limit: int = Query(50, ge=1, le=500),
offset: int = Query(0, ge=0),
status: Optional[str] = Query(None, regex="^(success|failed|running|skipped)$"),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get task execution logs from database.
Query Params:
- limit: Number of logs to return (1-500, default: 50)
- offset: Pagination offset (default: 0)
- status: Filter by status (success, failed, running, skipped)
Returns:
- List of execution logs with task details
- Total count for pagination
"""
try:
# Get user_id from current_user (Clerk format - convert to int if needed)
user_id_str = str(current_user.get('id', '')) if current_user else None
# Check if user_id column exists in the database
from sqlalchemy import inspect
inspector = inspect(db.bind)
columns = [col['name'] for col in inspector.get_columns('task_execution_logs')]
has_user_id_column = 'user_id' in columns
# If user_id column doesn't exist, we need to handle the query differently
# to avoid SQLAlchemy trying to access a non-existent column
if not has_user_id_column:
# Query without user_id column - use explicit column selection
from sqlalchemy import func
# Build query for count
count_query = db.query(func.count(TaskExecutionLog.id)).join(
MonitoringTask,
TaskExecutionLog.task_id == MonitoringTask.id
)
# Filter by status if provided
if status:
count_query = count_query.filter(TaskExecutionLog.status == status)
total_count = count_query.scalar() or 0
# Build query for data - select specific columns to avoid user_id
query = db.query(
TaskExecutionLog.id,
TaskExecutionLog.task_id,
TaskExecutionLog.execution_date,
TaskExecutionLog.status,
TaskExecutionLog.result_data,
TaskExecutionLog.error_message,
TaskExecutionLog.execution_time_ms,
TaskExecutionLog.created_at,
MonitoringTask
).join(
MonitoringTask,
TaskExecutionLog.task_id == MonitoringTask.id
)
# Filter by status if provided
if status:
query = query.filter(TaskExecutionLog.status == status)
# Get paginated results
logs = query.order_by(TaskExecutionLog.execution_date.desc()).offset(offset).limit(limit).all()
# Format results for compatibility
formatted_logs = []
for log_tuple in logs:
# Unpack the tuple
log_id, task_id, execution_date, log_status, result_data, error_message, execution_time_ms, created_at, task = log_tuple
log_data = {
'id': log_id,
'task_id': task_id,
'user_id': None, # No user_id column in database
'execution_date': execution_date.isoformat() if execution_date else None,
'status': log_status,
'error_message': error_message,
'execution_time_ms': execution_time_ms,
'result_data': result_data,
'created_at': created_at.isoformat() if created_at else None
}
# Add task details
if task:
log_data['task'] = {
'id': task.id,
'task_title': task.task_title,
'component_name': task.component_name,
'metric': task.metric,
'frequency': task.frequency
}
formatted_logs.append(log_data)
return {
'logs': formatted_logs,
'total_count': total_count,
'limit': limit,
'offset': offset,
'has_more': (offset + limit) < total_count,
'is_scheduler_logs': False # Explicitly mark as execution logs, not scheduler logs
}
# If user_id column exists, use the normal query path
# Build query with eager loading of task relationship
query = db.query(TaskExecutionLog).join(
MonitoringTask,
TaskExecutionLog.task_id == MonitoringTask.id
).options(
joinedload(TaskExecutionLog.task)
)
# Filter by status if provided
if status:
query = query.filter(TaskExecutionLog.status == status)
# Filter by user_id if provided (for user isolation)
if user_id_str and has_user_id_column:
# Note: user_id in TaskExecutionLog is Integer, but we have Clerk string
# For now, get all logs - can enhance later with user_id mapping
pass
# Get total count
total_count = query.count()
# Get paginated results
logs = query.order_by(desc(TaskExecutionLog.execution_date)).offset(offset).limit(limit).all()
# Format results
formatted_logs = []
for log in logs:
log_data = {
'id': log.id,
'task_id': log.task_id,
'user_id': log.user_id if has_user_id_column else None,
'execution_date': log.execution_date.isoformat() if log.execution_date else None,
'status': log.status,
'error_message': log.error_message,
'execution_time_ms': log.execution_time_ms,
'result_data': log.result_data,
'created_at': log.created_at.isoformat() if log.created_at else None
}
# Add task details if available
if log.task:
log_data['task'] = {
'id': log.task.id,
'task_title': log.task.task_title,
'component_name': log.task.component_name,
'metric': log.task.metric,
'frequency': log.task.frequency
}
formatted_logs.append(log_data)
return {
'logs': formatted_logs,
'total_count': total_count,
'limit': limit,
'offset': offset,
'has_more': (offset + limit) < total_count,
'is_scheduler_logs': False # Explicitly mark as execution logs, not scheduler logs
}
except Exception as e:
logger.error(f"Error getting execution logs: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get execution logs: {str(e)}")
@router.get("/jobs")
async def get_scheduler_jobs(
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get detailed information about all scheduled jobs.
Returns:
- List of jobs with detailed information
- Job ID, trigger type, next run time
- User context (extracted from job ID/kwargs)
- Job store name (from user's website root)
"""
try:
scheduler = get_scheduler()
all_jobs = scheduler.scheduler.get_jobs()
formatted_jobs = []
for job in all_jobs:
job_info = {
'id': job.id,
'trigger_type': type(job.trigger).__name__,
'next_run_time': job.next_run_time.isoformat() if job.next_run_time else None,
'jobstore': getattr(job, 'jobstore', 'default'),
'user_id': None,
'user_job_store': 'default',
'function_name': None
}
# Extract user_id from job
user_id_from_job = None
if hasattr(job, 'kwargs') and job.kwargs and job.kwargs.get('user_id'):
user_id_from_job = job.kwargs.get('user_id')
elif job.id and ('research_persona_' in job.id or 'facebook_persona_' in job.id):
parts = job.id.split('_')
if len(parts) >= 3:
user_id_from_job = parts[2]
if user_id_from_job:
job_info['user_id'] = user_id_from_job
try:
user_job_store = get_user_job_store_name(user_id_from_job, db)
job_info['user_job_store'] = user_job_store
except Exception as e:
logger.debug(f"Could not get job store for user {user_id_from_job}: {e}")
# Get function name if available
if hasattr(job, 'func') and hasattr(job.func, '__name__'):
job_info['function_name'] = job.func.__name__
elif hasattr(job, 'func_ref'):
job_info['function_name'] = str(job.func_ref)
formatted_jobs.append(job_info)
return {
'jobs': formatted_jobs,
'total_jobs': len(formatted_jobs),
'recurring_jobs': 1, # check_due_tasks
'one_time_jobs': len(formatted_jobs) - 1
}
except Exception as e:
logger.error(f"Error getting scheduler jobs: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get scheduler jobs: {str(e)}")
@router.get("/event-history")
async def get_scheduler_event_history(
limit: int = Query(100, ge=1, le=1000),
offset: int = Query(0, ge=0),
event_type: Optional[str] = Query(None, regex="^(check_cycle|interval_adjustment|start|stop|job_scheduled|job_cancelled|job_completed|job_failed)$"),
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get scheduler event history from database.
This endpoint returns historical scheduler events such as:
- Check cycles (when scheduler runs and checks for due tasks)
- Interval adjustments (when check interval changes)
- Scheduler start/stop events
- Job scheduled/cancelled events
Query Params:
- limit: Number of events to return (1-1000, default: 100)
- offset: Pagination offset (default: 0)
- event_type: Filter by event type (check_cycle, interval_adjustment, start, stop, etc.)
Returns:
- List of scheduler events with details
- Total count for pagination
"""
try:
# Build query
query = db.query(SchedulerEventLog)
# Filter by event type if provided
if event_type:
query = query.filter(SchedulerEventLog.event_type == event_type)
# Get total count
total_count = query.count()
# Get paginated results (most recent first)
events = query.order_by(desc(SchedulerEventLog.event_date)).offset(offset).limit(limit).all()
# Format results
formatted_events = []
for event in events:
event_data = {
'id': event.id,
'event_type': event.event_type,
'event_date': event.event_date.isoformat() if event.event_date else None,
'check_cycle_number': event.check_cycle_number,
'check_interval_minutes': event.check_interval_minutes,
'previous_interval_minutes': event.previous_interval_minutes,
'new_interval_minutes': event.new_interval_minutes,
'tasks_found': event.tasks_found,
'tasks_executed': event.tasks_executed,
'tasks_failed': event.tasks_failed,
'tasks_by_type': event.tasks_by_type,
'check_duration_seconds': event.check_duration_seconds,
'active_strategies_count': event.active_strategies_count,
'active_executions': event.active_executions,
'job_id': event.job_id,
'job_type': event.job_type,
'user_id': event.user_id,
'event_data': event.event_data,
'error_message': event.error_message,
'created_at': event.created_at.isoformat() if event.created_at else None
}
formatted_events.append(event_data)
return {
'events': formatted_events,
'total_count': total_count,
'limit': limit,
'offset': offset,
'has_more': (offset + limit) < total_count
}
except Exception as e:
logger.error(f"Error getting scheduler event history: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get scheduler event history: {str(e)}")
@router.get("/recent-scheduler-logs")
async def get_recent_scheduler_logs(
current_user: Dict[str, Any] = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""
Get recent scheduler logs (restoration, job scheduling, etc.) for display in Execution Logs.
These are informational logs that show scheduler activity when actual execution logs are not available.
Returns only the latest 5 logs (rolling window, not accumulating).
Returns:
- List of latest 5 scheduler events (job_scheduled, job_completed, job_failed)
- Formatted as execution log-like entries for display
"""
try:
# Get only the latest 5 scheduler events - simple rolling window
# Focus on job-related events that indicate scheduler activity
query = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type.in_(['job_scheduled', 'job_completed', 'job_failed'])
).order_by(desc(SchedulerEventLog.event_date)).limit(5)
events = query.all()
# Log for debugging - show more details
logger.warning(
f"[Dashboard] Recent scheduler logs query: found {len(events)} events"
)
if events:
for e in events:
logger.warning(
f"[Dashboard] - Event: {e.event_type} | "
f"Job ID: {e.job_id} | User: {e.user_id} | "
f"Date: {e.event_date} | Error: {bool(e.error_message)}"
)
else:
# Check if there are ANY events of these types
total_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type.in_(['job_scheduled', 'job_completed', 'job_failed'])
).scalar() or 0
logger.warning(
f"[Dashboard] No recent scheduler logs found (query returned 0). "
f"Total events of these types in DB: {total_count}"
)
# Format as execution log-like entries
formatted_logs = []
for event in events:
event_data = event.event_data or {}
# Determine status based on event type
status = 'running'
if event.event_type == 'job_completed':
status = 'success'
elif event.event_type == 'job_failed':
status = 'failed'
# Extract job function name
job_function = event_data.get('job_function') or event_data.get('function_name') or 'unknown'
# Extract execution time if available
execution_time_ms = None
if event_data.get('execution_time_seconds'):
execution_time_ms = int(event_data.get('execution_time_seconds', 0) * 1000)
log_entry = {
'id': f"scheduler_event_{event.id}",
'task_id': None,
'user_id': event.user_id,
'execution_date': event.event_date.isoformat() if event.event_date else None,
'status': status,
'error_message': event.error_message,
'execution_time_ms': execution_time_ms,
'result_data': None,
'created_at': event.created_at.isoformat() if event.created_at else None,
'task': {
'id': None,
'task_title': f"{event.event_type.replace('_', ' ').title()}: {event.job_id or 'N/A'}",
'component_name': 'Scheduler',
'metric': job_function,
'frequency': 'one-time'
},
'is_scheduler_log': True, # Flag to indicate this is a scheduler log, not execution log
'event_type': event.event_type,
'job_id': event.job_id
}
formatted_logs.append(log_entry)
# Log the formatted response for debugging
logger.warning(
f"[Dashboard] Formatted {len(formatted_logs)} scheduler logs for response. "
f"Sample log entry keys: {list(formatted_logs[0].keys()) if formatted_logs else 'none'}"
)
return {
'logs': formatted_logs,
'total_count': len(formatted_logs),
'limit': 5,
'offset': 0,
'has_more': False,
'is_scheduler_logs': True # Indicate these are scheduler logs, not execution logs
}
except Exception as e:
logger.error(f"Error getting recent scheduler logs: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get recent scheduler logs: {str(e)}")

View File

@@ -49,6 +49,9 @@ from api.images import router as images_router
from api.hallucination_detector import router as hallucination_detector_router
from api.writing_assistant import router as writing_assistant_router
# Import research configuration router
from api.research_config import router as research_config_router
# Import user data endpoints
# Import content planning endpoints
from api.content_planning.api.router import router as content_planning_router
@@ -63,6 +66,9 @@ from api.content_planning.strategy_copilot import router as strategy_copilot_rou
# Import database service
from services.database import init_database, close_database
# Import OAuth token monitoring routes
from api.oauth_token_monitoring_routes import router as oauth_token_monitoring_router
# Import SEO Dashboard endpoints
from api.seo_dashboard import (
get_seo_dashboard_data,
@@ -283,6 +289,14 @@ from routers.platform_analytics import router as platform_analytics_router
app.include_router(platform_analytics_router)
app.include_router(images_router)
# Include research configuration router
app.include_router(research_config_router, prefix="/api/research", tags=["research"])
# Scheduler dashboard routes
from api.scheduler_dashboard import router as scheduler_dashboard_router
app.include_router(scheduler_dashboard_router)
app.include_router(oauth_token_monitoring_router)
# Setup frontend serving using modular utilities
frontend_serving.setup_frontend_serving()

View File

@@ -49,7 +49,8 @@ class APIKeyInjectionMiddleware:
else:
logger.warning(f"[API Key Injection] User object missing ID: {user}")
else:
logger.warning("[API Key Injection] Token verification failed")
# Token verification failed (likely expired) - log at debug level to reduce noise
logger.debug("[API Key Injection] Token verification failed (likely expired token)")
except Exception as e:
logger.error(f"[API Key Injection] Could not extract user from token: {e}")

View File

@@ -156,7 +156,12 @@ class ClerkAuthMiddleware:
logger.warning("No user ID found in verified token")
return None
except Exception as e:
logger.warning(f"fastapi-clerk-auth verification error: {e}")
# Expired tokens are expected - log at debug level to reduce noise
error_msg = str(e).lower()
if 'expired' in error_msg or 'signature has expired' in error_msg:
logger.debug(f"Token expired (expected): {e}")
else:
logger.warning(f"fastapi-clerk-auth verification error: {e}")
return None
else:
# Fallback to custom implementation (not secure for production)
@@ -218,7 +223,9 @@ async def get_current_user(
token = credentials.credentials
user = await clerk_auth.verify_token(token)
if not user:
logger.warning("Token verification failed")
# Token verification failed (likely expired) - log at debug level to reduce noise
# The HTTPException will still be raised, but we don't need to spam logs
logger.debug("Token verification failed (likely expired token)")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Authentication failed",

View File

@@ -0,0 +1,98 @@
"""
OAuth Token Monitoring Models
Database models for tracking OAuth token status and monitoring tasks.
"""
from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean, JSON, Index, ForeignKey
from sqlalchemy.orm import relationship
from datetime import datetime
# Import the same Base from enhanced_strategy_models
from models.enhanced_strategy_models import Base
class OAuthTokenMonitoringTask(Base):
"""
Model for storing OAuth token monitoring tasks.
Tracks per-user, per-platform token monitoring with weekly checks.
"""
__tablename__ = "oauth_token_monitoring_tasks"
id = Column(Integer, primary_key=True, index=True)
# User and Platform Identification
user_id = Column(String(255), nullable=False, index=True) # Clerk user ID (string)
platform = Column(String(50), nullable=False) # 'gsc', 'bing', 'wordpress', 'wix'
# Task Status
status = Column(String(50), default='active') # 'active', 'failed', 'paused'
# Execution Tracking
last_check = Column(DateTime, nullable=True)
last_success = Column(DateTime, nullable=True)
last_failure = Column(DateTime, nullable=True)
failure_reason = Column(Text, nullable=True)
# Scheduling
next_check = Column(DateTime, nullable=True, index=True) # Next scheduled check time
# Metadata
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Execution Logs Relationship
execution_logs = relationship(
"OAuthTokenExecutionLog",
back_populates="task",
cascade="all, delete-orphan"
)
# Indexes for efficient queries
__table_args__ = (
Index('idx_user_platform', 'user_id', 'platform'),
Index('idx_next_check', 'next_check'),
Index('idx_status', 'status'),
)
def __repr__(self):
return f"<OAuthTokenMonitoringTask(id={self.id}, user_id={self.user_id}, platform={self.platform}, status={self.status})>"
class OAuthTokenExecutionLog(Base):
"""
Model for storing OAuth token monitoring execution logs.
Tracks individual execution attempts with results and error details.
"""
__tablename__ = "oauth_token_execution_logs"
id = Column(Integer, primary_key=True, index=True)
# Task Reference
task_id = Column(Integer, ForeignKey("oauth_token_monitoring_tasks.id"), nullable=False, index=True)
# Execution Details
execution_date = Column(DateTime, default=datetime.utcnow, nullable=False)
status = Column(String(50), nullable=False) # 'success', 'failed', 'skipped'
# Results
result_data = Column(JSON, nullable=True) # Token status, expiration info, etc.
error_message = Column(Text, nullable=True)
execution_time_ms = Column(Integer, nullable=True)
# Metadata
created_at = Column(DateTime, default=datetime.utcnow)
# Relationship to task
task = relationship("OAuthTokenMonitoringTask", back_populates="execution_logs")
# Indexes for efficient queries
__table_args__ = (
Index('idx_task_execution_date', 'task_id', 'execution_date'),
Index('idx_status', 'status'),
)
def __repr__(self):
return f"<OAuthTokenExecutionLog(id={self.id}, task_id={self.task_id}, status={self.status}, execution_date={self.execution_date})>"

View File

@@ -157,12 +157,14 @@ class PersonaData(Base):
id = Column(Integer, primary_key=True, autoincrement=True)
session_id = Column(Integer, ForeignKey('onboarding_sessions.id', ondelete='CASCADE'), nullable=False)
# Persona generation results
# Persona generation results
core_persona = Column(JSON) # Core persona data (demographics, psychographics, etc.)
platform_personas = Column(JSON) # Platform-specific personas (LinkedIn, Twitter, etc.)
quality_metrics = Column(JSON) # Quality assessment metrics
selected_platforms = Column(JSON) # Array of selected platforms
research_persona = Column(JSON, nullable=True) # AI-generated research persona with personalized defaults
research_persona_generated_at = Column(DateTime, nullable=True) # Timestamp for 7-day TTL cache validation
# Metadata
created_at = Column(DateTime, default=func.now())
updated_at = Column(DateTime, default=func.now(), onupdate=func.now())
@@ -182,6 +184,8 @@ class PersonaData(Base):
'platform_personas': self.platform_personas,
'quality_metrics': self.quality_metrics,
'selected_platforms': self.selected_platforms,
'research_persona': self.research_persona,
'research_persona_generated_at': self.research_persona_generated_at.isoformat() if self.research_persona_generated_at else None,
'created_at': self.created_at.isoformat() if self.created_at else None,
'updated_at': self.updated_at.isoformat() if self.updated_at else None
}

View File

@@ -0,0 +1,110 @@
"""
Research Persona Models
Pydantic models for AI-generated research personas.
"""
from typing import Dict, Any, List, Optional
from pydantic import BaseModel, Field
from datetime import datetime
class ResearchPreset(BaseModel):
"""Research preset configuration."""
name: str
keywords: str
industry: str
target_audience: str
research_mode: str = Field(..., description="basic, comprehensive, or targeted")
config: Dict[str, Any] = Field(default_factory=dict, description="Complete ResearchConfig object")
description: Optional[str] = None
icon: Optional[str] = None
gradient: Optional[str] = None
class ResearchPersona(BaseModel):
"""AI-generated research persona providing personalized defaults and suggestions."""
# Smart Defaults
default_industry: str = Field(..., description="Default industry from onboarding data")
default_target_audience: str = Field(..., description="Default target audience from onboarding data")
default_research_mode: str = Field(..., description="basic, comprehensive, or targeted")
default_provider: str = Field(..., description="google or exa")
# Keyword Intelligence
suggested_keywords: List[str] = Field(default_factory=list, description="8-12 relevant keywords")
keyword_expansion_patterns: Dict[str, List[str]] = Field(
default_factory=dict,
description="Mapping of keywords to expanded, industry-specific terms"
)
# Domain & Source Intelligence
suggested_exa_domains: List[str] = Field(
default_factory=list,
description="4-6 authoritative domains for the industry"
)
suggested_exa_category: Optional[str] = Field(
None,
description="Suggested Exa category based on industry"
)
# Query Enhancement Intelligence
research_angles: List[str] = Field(
default_factory=list,
description="5-8 alternative research angles/focuses"
)
query_enhancement_rules: Dict[str, str] = Field(
default_factory=dict,
description="Templates for improving vague user queries"
)
# Research History Insights
recommended_presets: List[ResearchPreset] = Field(
default_factory=list,
description="3-5 personalized research preset templates"
)
# Research Preferences
research_preferences: Dict[str, Any] = Field(
default_factory=dict,
description="Structured research preferences from onboarding"
)
# Metadata
generated_at: Optional[str] = Field(None, description="ISO timestamp of generation")
confidence_score: Optional[float] = Field(None, ge=0.0, le=1.0, description="Confidence score 0-1")
version: Optional[str] = Field(None, description="Schema version")
class Config:
json_schema_extra = {
"example": {
"default_industry": "Healthcare",
"default_target_audience": "Medical professionals and healthcare administrators",
"default_research_mode": "comprehensive",
"default_provider": "exa",
"suggested_keywords": ["telemedicine", "patient care", "healthcare technology"],
"keyword_expansion_patterns": {
"AI": ["healthcare AI", "medical AI", "clinical AI"],
"tools": ["medical devices", "clinical tools"]
},
"suggested_exa_domains": ["pubmed.gov", "nejm.org", "thelancet.com"],
"suggested_exa_category": "research paper",
"research_angles": [
"Compare telemedicine platforms",
"Telemedicine ROI analysis",
"Latest telemedicine trends"
],
"query_enhancement_rules": {
"vague_ai": "Research: AI applications in Healthcare for Medical professionals",
"vague_tools": "Compare top Healthcare tools"
},
"recommended_presets": [],
"research_preferences": {
"research_depth": "comprehensive",
"content_types": ["blog", "article"]
},
"generated_at": "2024-01-01T00:00:00Z",
"confidence_score": 0.85,
"version": "1.0"
}
}

View File

@@ -0,0 +1,48 @@
"""
Scheduler Event Models
Models for tracking scheduler-level events and history.
"""
from sqlalchemy import Column, Integer, String, Text, DateTime, JSON, Float
from datetime import datetime
# Import the same Base from enhanced_strategy_models
from models.enhanced_strategy_models import Base
class SchedulerEventLog(Base):
"""Model for storing scheduler-level events (check cycles, interval adjustments, etc.)"""
__tablename__ = "scheduler_event_logs"
id = Column(Integer, primary_key=True, index=True)
event_type = Column(String(50), nullable=False) # 'check_cycle', 'interval_adjustment', 'start', 'stop', 'job_scheduled', 'job_cancelled'
event_date = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
# Event details
check_cycle_number = Column(Integer, nullable=True) # For check_cycle events
check_interval_minutes = Column(Integer, nullable=True) # Interval at time of event
previous_interval_minutes = Column(Integer, nullable=True) # For interval_adjustment events
new_interval_minutes = Column(Integer, nullable=True) # For interval_adjustment events
# Task execution summary for check cycles
tasks_found = Column(Integer, nullable=True)
tasks_executed = Column(Integer, nullable=True)
tasks_failed = Column(Integer, nullable=True)
tasks_by_type = Column(JSON, nullable=True) # {'monitoring_task': 5, ...}
# Job information
job_id = Column(String(200), nullable=True) # For job_scheduled/cancelled events
job_type = Column(String(50), nullable=True) # 'recurring', 'one_time'
user_id = Column(String(200), nullable=True, index=True) # For user isolation
# Performance metrics
check_duration_seconds = Column(Float, nullable=True) # How long the check cycle took
active_strategies_count = Column(Integer, nullable=True)
active_executions = Column(Integer, nullable=True)
# Additional context
event_data = Column(JSON, nullable=True) # Additional event-specific data
error_message = Column(Text, nullable=True) # For error events
created_at = Column(DateTime, default=datetime.utcnow)

View File

@@ -389,10 +389,19 @@ class ResearchService:
exa_provider.track_exa_usage(user_id, cost)
# Extract content for downstream analysis
# Handle None result case
if raw_result is None:
logger.error("raw_result is None after Exa search - this should not happen if HTTPException was raised")
raise ValueError("Exa research result is None - search operation failed unexpectedly")
if not isinstance(raw_result, dict):
logger.warning(f"raw_result is not a dict (type: {type(raw_result)}), using defaults")
raw_result = {}
content = raw_result.get('content', '')
sources = raw_result.get('sources', [])
sources = raw_result.get('sources', []) or []
search_widget = "" # Exa doesn't provide search widgets
search_queries = raw_result.get('search_queries', [])
search_queries = raw_result.get('search_queries', []) or []
grounding_metadata = None # Exa doesn't provide grounding metadata
except RuntimeError as e:
@@ -423,10 +432,15 @@ class ResearchService:
await task_manager.update_progress(task_id, "📊 Processing research results and extracting insights...")
# Extract sources and content
# Handle None result case
if gemini_result is None:
logger.error("gemini_result is None after search - this should not happen if HTTPException was raised")
raise ValueError("Research result is None - search operation failed unexpectedly")
sources = self._extract_sources_from_grounding(gemini_result)
content = gemini_result.get("content", "")
search_widget = gemini_result.get("search_widget", "") or ""
search_queries = gemini_result.get("search_queries", []) or []
content = gemini_result.get("content", "") if isinstance(gemini_result, dict) else ""
search_widget = gemini_result.get("search_widget", "") or "" if isinstance(gemini_result, dict) else ""
search_queries = gemini_result.get("search_queries", []) or [] if isinstance(gemini_result, dict) else []
grounding_metadata = self._extract_grounding_metadata(gemini_result)
# Continue with common analysis (same for both providers)
@@ -548,8 +562,17 @@ class ResearchService:
"""Extract sources from Gemini grounding metadata."""
sources = []
# Handle None or invalid gemini_result
if not gemini_result or not isinstance(gemini_result, dict):
logger.warning("gemini_result is None or not a dict, returning empty sources")
return sources
# The Gemini grounded provider already extracts sources and puts them in the 'sources' field
raw_sources = gemini_result.get("sources", [])
# Ensure raw_sources is a list (handle None case)
if raw_sources is None:
raw_sources = []
for src in raw_sources:
source = ResearchSource(
title=src.get("title", "Untitled"),
@@ -570,6 +593,15 @@ class ResearchService:
grounding_supports = []
citations = []
# Handle None or invalid gemini_result
if not gemini_result or not isinstance(gemini_result, dict):
logger.warning("gemini_result is None or not a dict, returning empty grounding metadata")
return GroundingMetadata(
grounding_chunks=grounding_chunks,
grounding_supports=grounding_supports,
citations=citations
)
# Extract grounding chunks from the raw grounding metadata
raw_grounding = gemini_result.get("grounding_metadata", {})
@@ -577,7 +609,11 @@ class ResearchService:
if hasattr(raw_grounding, 'grounding_chunks'):
raw_chunks = raw_grounding.grounding_chunks
else:
raw_chunks = raw_grounding.get("grounding_chunks", [])
raw_chunks = raw_grounding.get("grounding_chunks", []) if isinstance(raw_grounding, dict) else []
# Ensure raw_chunks is a list (handle None case)
if raw_chunks is None:
raw_chunks = []
for chunk in raw_chunks:
if "web" in chunk:

View File

@@ -0,0 +1,179 @@
"""
OAuth Token Monitoring Service
Service for creating and managing OAuth token monitoring tasks.
"""
from datetime import datetime, timedelta
from typing import List, Optional
from sqlalchemy.orm import Session
from utils.logger_utils import get_service_logger
import os
# Use service logger for consistent logging (WARNING level visible in production)
logger = get_service_logger("oauth_token_monitoring")
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
from services.gsc_service import GSCService
from services.integrations.bing_oauth import BingOAuthService
from services.integrations.wordpress_oauth import WordPressOAuthService
# Note: Wix tokens are stored in frontend sessionStorage, not backend database
# So we cannot check for Wix connections from the backend yet
def get_connected_platforms(user_id: str) -> List[str]:
"""
Detect which platforms are connected for a user by checking token storage.
Checks:
- GSC: gsc_credentials table
- Bing: bing_oauth_tokens table
- WordPress: wordpress_oauth_tokens table
- Wix: Not checked (tokens in frontend sessionStorage)
Args:
user_id: User ID (Clerk string)
Returns:
List of connected platform identifiers: ['gsc', 'bing', 'wordpress', 'wix']
"""
connected = []
logger.warning(f"[OAuth Monitoring] Checking connected platforms for user: {user_id}")
try:
# Check GSC - use absolute database path
db_path = os.path.abspath("alwrity.db")
logger.warning(f"[OAuth Monitoring] Checking GSC with db_path: {db_path}")
gsc_service = GSCService(db_path=db_path)
gsc_credentials = gsc_service.load_user_credentials(user_id)
if gsc_credentials:
connected.append('gsc')
logger.warning(f"[OAuth Monitoring] ✅ GSC connected for user {user_id}")
else:
logger.warning(f"[OAuth Monitoring] ❌ GSC not connected for user {user_id} (no credentials found)")
except Exception as e:
logger.warning(f"[OAuth Monitoring] ⚠️ GSC check failed for user {user_id}: {e}", exc_info=True)
try:
# Check Bing - use absolute database path
db_path = os.path.abspath("alwrity.db")
logger.warning(f"[OAuth Monitoring] Checking Bing with db_path: {db_path}")
bing_service = BingOAuthService(db_path=db_path)
token_status = bing_service.get_user_token_status(user_id)
has_tokens = token_status.get('has_active_tokens', False)
logger.warning(f"[OAuth Monitoring] Bing token_status keys: {list(token_status.keys())}, has_active_tokens: {has_tokens}")
if has_tokens:
connected.append('bing')
logger.warning(f"[OAuth Monitoring] ✅ Bing connected for user {user_id}")
else:
logger.warning(f"[OAuth Monitoring] ❌ Bing not connected for user {user_id} (no active tokens)")
except Exception as e:
logger.warning(f"[OAuth Monitoring] ⚠️ Bing check failed for user {user_id}: {e}", exc_info=True)
try:
# Check WordPress - use absolute database path
db_path = os.path.abspath("alwrity.db")
logger.warning(f"[OAuth Monitoring] Checking WordPress with db_path: {db_path}")
wordpress_service = WordPressOAuthService(db_path=db_path)
tokens = wordpress_service.get_user_tokens(user_id)
logger.warning(f"[OAuth Monitoring] WordPress tokens found: {len(tokens) if tokens else 0}")
if tokens and len(tokens) > 0:
connected.append('wordpress')
logger.warning(f"[OAuth Monitoring] ✅ WordPress connected for user {user_id} ({len(tokens)} token(s))")
else:
logger.warning(f"[OAuth Monitoring] ❌ WordPress not connected for user {user_id} (no tokens found)")
except Exception as e:
logger.warning(f"[OAuth Monitoring] ⚠️ WordPress check failed for user {user_id}: {e}", exc_info=True)
# Wix: Not checked (tokens in frontend sessionStorage)
# TODO: Once backend storage is implemented, check wix_tokens table
logger.warning(f"[OAuth Monitoring] Connected platforms for user {user_id}: {connected}")
return connected
def create_oauth_monitoring_tasks(
user_id: str,
db: Session,
platforms: Optional[List[str]] = None
) -> List[OAuthTokenMonitoringTask]:
"""
Create OAuth token monitoring tasks for a user.
If platforms are not provided, automatically detects connected platforms.
Creates one task per platform with next_check set to 7 days from now.
Args:
user_id: User ID (Clerk string)
db: Database session
platforms: Optional list of platforms to create tasks for.
If None, auto-detects connected platforms.
Valid values: 'gsc', 'bing', 'wordpress', 'wix'
Returns:
List of created OAuthTokenMonitoringTask instances
"""
try:
# Auto-detect platforms if not provided
if platforms is None:
platforms = get_connected_platforms(user_id)
logger.warning(f"[OAuth Monitoring] Auto-detected {len(platforms)} connected platforms for user {user_id}: {platforms}")
else:
logger.warning(f"[OAuth Monitoring] Creating monitoring tasks for specified platforms: {platforms}")
if not platforms:
logger.warning(f"[OAuth Monitoring] No connected platforms found for user {user_id}. No monitoring tasks created.")
return []
created_tasks = []
now = datetime.utcnow()
next_check = now + timedelta(days=7) # 7 days from now
for platform in platforms:
# Check if task already exists for this user/platform combination
existing_task = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.user_id == user_id,
OAuthTokenMonitoringTask.platform == platform
).first()
if existing_task:
logger.warning(
f"[OAuth Monitoring] Monitoring task already exists for user {user_id}, platform {platform}. "
f"Skipping creation."
)
continue
# Create new monitoring task
task = OAuthTokenMonitoringTask(
user_id=user_id,
platform=platform,
status='active',
next_check=next_check,
created_at=now,
updated_at=now
)
db.add(task)
created_tasks.append(task)
logger.warning(
f"[OAuth Monitoring] Created OAuth token monitoring task for user {user_id}, "
f"platform {platform}, next_check: {next_check.isoformat()}"
)
db.commit()
logger.warning(
f"[OAuth Monitoring] Successfully created {len(created_tasks)} OAuth token monitoring tasks "
f"for user {user_id}"
)
return created_tasks
except Exception as e:
logger.error(
f"Error creating OAuth token monitoring tasks for user {user_id}: {e}",
exc_info=True
)
db.rollback()
return []

View File

@@ -26,12 +26,63 @@ class OnboardingDatabaseService:
# Cache for schema feature detection
self._brand_cols_checked: bool = False
self._brand_cols_available: bool = False
self._research_persona_cols_checked: bool = False
self._research_persona_cols_available: bool = False
# --- Feature flags and schema detection helpers ---
def _brand_feature_enabled(self) -> bool:
"""Check if writing brand-related columns is enabled via env flag."""
return os.getenv('ENABLE_WEBSITE_BRAND_COLUMNS', 'true').lower() in {'1', 'true', 'yes', 'on'}
def _ensure_research_persona_columns(self, session_db: Session) -> None:
"""Ensure research_persona columns exist in persona_data table (runtime migration)."""
if self._research_persona_cols_checked:
return
try:
# Check if columns exist using PRAGMA (SQLite) or information_schema (PostgreSQL)
db_url = str(session_db.bind.url) if session_db.bind else ""
if 'sqlite' in db_url.lower():
# SQLite: Use PRAGMA to check columns
result = session_db.execute(text("PRAGMA table_info(persona_data)"))
cols = {row[1] for row in result} # Column name is at index 1
if 'research_persona' not in cols:
logger.info("Adding missing column research_persona to persona_data table")
session_db.execute(text("ALTER TABLE persona_data ADD COLUMN research_persona JSON"))
session_db.commit()
if 'research_persona_generated_at' not in cols:
logger.info("Adding missing column research_persona_generated_at to persona_data table")
session_db.execute(text("ALTER TABLE persona_data ADD COLUMN research_persona_generated_at TIMESTAMP"))
session_db.commit()
self._research_persona_cols_available = True
else:
# PostgreSQL: Try to query the columns (will fail if they don't exist)
try:
session_db.execute(text("SELECT research_persona, research_persona_generated_at FROM persona_data LIMIT 0"))
self._research_persona_cols_available = True
except Exception:
# Columns don't exist, add them
logger.info("Adding missing columns research_persona and research_persona_generated_at to persona_data table")
try:
session_db.execute(text("ALTER TABLE persona_data ADD COLUMN research_persona JSONB"))
session_db.execute(text("ALTER TABLE persona_data ADD COLUMN research_persona_generated_at TIMESTAMP"))
session_db.commit()
self._research_persona_cols_available = True
except Exception as alter_err:
logger.error(f"Failed to add research_persona columns: {alter_err}")
session_db.rollback()
raise
except Exception as e:
logger.error(f"Error ensuring research_persona columns: {e}")
session_db.rollback()
raise
finally:
self._research_persona_cols_checked = True
def _ensure_brand_column_detection(self, session_db: Session) -> None:
"""Detect at runtime whether brand columns exist and cache the result."""
if self._brand_cols_checked:
@@ -477,6 +528,9 @@ class OnboardingDatabaseService:
if not session_db:
raise ValueError("Database session required")
# Ensure research_persona columns exist before querying
self._ensure_research_persona_columns(session_db)
try:
session = self.get_session_by_user(user_id, session_db)
if not session:

View File

@@ -0,0 +1,239 @@
"""
Facebook Persona Scheduler
Handles scheduled generation of Facebook personas after onboarding.
"""
from datetime import datetime, timedelta, timezone
from typing import Dict, Any
from loguru import logger
from services.database import get_db_session
from services.persona_data_service import PersonaDataService
from services.persona.facebook.facebook_persona_service import FacebookPersonaService
from services.onboarding.database_service import OnboardingDatabaseService
from models.scheduler_models import SchedulerEventLog
async def generate_facebook_persona_task(user_id: str):
"""
Async task function to generate Facebook persona for a user.
This function is called by the scheduler 20 minutes after onboarding completion.
Args:
user_id: User ID (Clerk string)
"""
db = None
try:
logger.info(f"Scheduled Facebook persona generation started for user {user_id}")
db = get_db_session()
if not db:
logger.error(f"Failed to get database session for Facebook persona generation (user: {user_id})")
return
# Get persona data service
persona_data_service = PersonaDataService(db_session=db)
onboarding_service = OnboardingDatabaseService(db=db)
# Get core persona (required for Facebook persona)
persona_data = persona_data_service.get_user_persona_data(user_id)
if not persona_data or not persona_data.get('core_persona'):
logger.warning(f"No core persona found for user {user_id}, cannot generate Facebook persona")
return
core_persona = persona_data.get('core_persona', {})
# Get onboarding data for context
website_analysis = onboarding_service.get_website_analysis(user_id, db)
research_prefs = onboarding_service.get_research_preferences(user_id, db)
onboarding_data = {
"website_url": website_analysis.get('website_url', '') if website_analysis else '',
"writing_style": website_analysis.get('writing_style', {}) if website_analysis else {},
"content_characteristics": website_analysis.get('content_characteristics', {}) if website_analysis else {},
"target_audience": website_analysis.get('target_audience', '') if website_analysis else '',
"research_preferences": research_prefs or {}
}
# Check if persona already exists to avoid unnecessary API calls
platform_personas = persona_data.get('platform_personas', {}) if persona_data else {}
if platform_personas.get('facebook'):
logger.info(f"Facebook persona already exists for user {user_id}, skipping generation")
return
start_time = datetime.utcnow()
# Generate Facebook persona
facebook_service = FacebookPersonaService()
try:
generated_persona = facebook_service.generate_facebook_persona(
core_persona,
onboarding_data
)
execution_time = (datetime.utcnow() - start_time).total_seconds()
if generated_persona and "error" not in generated_persona:
# Save to database
success = persona_data_service.save_platform_persona(user_id, 'facebook', generated_persona)
if success:
logger.info(f"✅ Scheduled Facebook persona generation completed for user {user_id}")
# Log success to scheduler event log for dashboard
try:
event_log = SchedulerEventLog(
event_type='job_completed',
event_date=start_time,
job_id=f"facebook_persona_{user_id}",
job_type='one_time',
user_id=user_id,
event_data={
'job_function': 'generate_facebook_persona_task',
'execution_time_seconds': execution_time,
'status': 'success'
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log Facebook persona generation success to scheduler event log: {log_error}")
if db:
db.rollback()
else:
error_msg = f"Failed to save Facebook persona for user {user_id}"
logger.warning(f"⚠️ {error_msg}")
# Log failure to scheduler event log
try:
event_log = SchedulerEventLog(
event_type='job_failed',
event_date=start_time,
job_id=f"facebook_persona_{user_id}",
job_type='one_time',
user_id=user_id,
error_message=error_msg,
event_data={
'job_function': 'generate_facebook_persona_task',
'execution_time_seconds': execution_time,
'status': 'failed',
'failure_reason': 'save_failed',
'expensive_api_call': True
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log Facebook persona save failure to scheduler event log: {log_error}")
if db:
db.rollback()
else:
error_msg = f"Scheduled Facebook persona generation failed for user {user_id}: {generated_persona}"
logger.error(f"{error_msg}")
# Log failure to scheduler event log for dashboard visibility
try:
event_log = SchedulerEventLog(
event_type='job_failed',
event_date=start_time,
job_id=f"facebook_persona_{user_id}", # Match scheduled job ID format
job_type='one_time',
user_id=user_id,
error_message=error_msg,
event_data={
'job_function': 'generate_facebook_persona_task',
'execution_time_seconds': execution_time,
'status': 'failed',
'failure_reason': 'generation_returned_error',
'expensive_api_call': True
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log Facebook persona generation failure to scheduler event log: {log_error}")
if db:
db.rollback()
except Exception as gen_error:
execution_time = (datetime.utcnow() - start_time).total_seconds()
error_msg = f"Exception during scheduled Facebook persona generation for user {user_id}: {str(gen_error)}. Expensive API call may have been made."
logger.error(f"{error_msg}")
# Log exception to scheduler event log for dashboard visibility
try:
event_log = SchedulerEventLog(
event_type='job_failed',
event_date=start_time,
job_id=f"facebook_persona_{user_id}", # Match scheduled job ID format
job_type='one_time',
user_id=user_id,
error_message=error_msg,
event_data={
'job_function': 'generate_facebook_persona_task',
'execution_time_seconds': execution_time,
'status': 'failed',
'failure_reason': 'exception',
'exception_type': type(gen_error).__name__,
'exception_message': str(gen_error),
'expensive_api_call': True
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log Facebook persona generation exception to scheduler event log: {log_error}")
if db:
db.rollback()
except Exception as e:
logger.error(f"Error in scheduled Facebook persona generation for user {user_id}: {e}")
finally:
if db:
try:
db.close()
except Exception as e:
logger.error(f"Error closing database session: {e}")
def schedule_facebook_persona_generation(user_id: str, delay_minutes: int = 20) -> str:
"""
Schedule Facebook persona generation for a user after a delay.
Args:
user_id: User ID (Clerk string)
delay_minutes: Delay in minutes before generating persona (default: 20)
Returns:
Job ID
"""
try:
from services.scheduler import get_scheduler
scheduler = get_scheduler()
# Calculate run date (current time + delay) - ensure UTC timezone-aware
run_date = datetime.now(timezone.utc) + timedelta(minutes=delay_minutes)
# Generate consistent job ID (without timestamp) for proper restoration
# This allows restoration to find and restore the job with original scheduled time
# Note: Clerk user_id already includes "user_" prefix, so we don't add it again
job_id = f"facebook_persona_{user_id}"
# Schedule the task
scheduled_job_id = scheduler.schedule_one_time_task(
func=generate_facebook_persona_task,
run_date=run_date,
job_id=job_id,
kwargs={"user_id": user_id},
replace_existing=True
)
logger.info(
f"Scheduled Facebook persona generation for user {user_id} "
f"at {run_date} (job_id: {scheduled_job_id})"
)
return scheduled_job_id
except Exception as e:
logger.error(f"Failed to schedule Facebook persona generation for user {user_id}: {e}")
raise

View File

@@ -0,0 +1,171 @@
"""
Research Persona Prompt Builder
Handles building comprehensive prompts for research persona generation.
Generates personalized research defaults, suggestions, and configurations.
"""
from typing import Dict, Any
import json
from loguru import logger
class ResearchPersonaPromptBuilder:
"""Builds comprehensive prompts for research persona generation."""
def build_research_persona_prompt(self, onboarding_data: Dict[str, Any]) -> str:
"""Build the research persona generation prompt with comprehensive data."""
# Extract data from onboarding_data
website_analysis = onboarding_data.get("website_analysis", {}) or {}
persona_data = onboarding_data.get("persona_data", {}) or {}
research_prefs = onboarding_data.get("research_preferences", {}) or {}
business_info = onboarding_data.get("business_info", {}) or {}
# Extract core persona
core_persona = persona_data.get("core_persona", {}) or {}
prompt = f"""
COMPREHENSIVE RESEARCH PERSONA GENERATION TASK: Create a highly detailed, personalized research persona based on the user's business, writing style, and content strategy. This persona will provide intelligent defaults and suggestions for research inputs.
=== USER CONTEXT ===
BUSINESS INFORMATION:
{json.dumps(business_info, indent=2)}
WEBSITE ANALYSIS:
{json.dumps(website_analysis, indent=2)}
CORE PERSONA:
{json.dumps(core_persona, indent=2)}
RESEARCH PREFERENCES:
{json.dumps(research_prefs, indent=2)}
=== RESEARCH PERSONA GENERATION REQUIREMENTS ===
Generate a comprehensive research persona in JSON format with the following structure:
1. DEFAULT VALUES:
- "default_industry": Extract from core_persona.industry, business_info.industry, or website_analysis target_audience. Use "General" only if none available.
- "default_target_audience": Extract from core_persona.target_audience, website_analysis.target_audience, or business_info.target_audience. Be specific and descriptive.
- "default_research_mode": Suggest "basic", "comprehensive", or "targeted" based on research_preferences.research_depth and content_type preferences.
- "default_provider": Suggest "google" for news/trends, "exa" for academic/technical deep-dives, or "google" as default.
2. KEYWORD INTELLIGENCE:
- "suggested_keywords": Generate 8-12 keywords relevant to the user's industry, interests (from core_persona), and content goals.
- "keyword_expansion_patterns": Create a dictionary mapping common keywords to expanded, industry-specific terms. Include 10-15 patterns like:
{{"AI": ["healthcare AI", "medical AI", "clinical AI", "diagnostic AI"], "tools": ["medical devices", "clinical tools"], ...}}
Focus on industry-specific terminology from the user's domain.
3. DOMAIN EXPERTISE:
- "suggested_exa_domains": List 4-6 authoritative domains for the user's industry (e.g., Healthcare: ["pubmed.gov", "nejm.org", "thelancet.com"]).
- "suggested_exa_category": Suggest appropriate Exa category based on industry:
- Healthcare/Science: "research paper"
- Finance: "financial report"
- Technology/Business: "company" or "news"
- Default: null (empty string for all categories)
4. RESEARCH ANGLES:
- "research_angles": Generate 5-8 alternative research angles/focuses based on:
- User's pain points and challenges (from core_persona)
- Industry trends and opportunities
- Content goals (from research_preferences)
- Audience interests (from core_persona.interests)
Examples: "Compare {{topic}} tools", "{{topic}} ROI analysis", "Latest {{topic}} trends", etc.
5. QUERY ENHANCEMENT:
- "query_enhancement_rules": Create templates for improving vague user queries:
{{"vague_ai": "Research: AI applications in {{industry}} for {{audience}}", "vague_tools": "Compare top {{industry}} tools", ...}}
Include 5-8 enhancement patterns.
6. RECOMMENDED PRESETS:
- "recommended_presets": Generate 3-5 personalized research preset templates. Each preset should include:
- name: Descriptive name (e.g., "{{Industry}} Trends", "{{Audience}} Insights")
- keywords: Research query string
- industry: User's industry
- target_audience: User's target audience
- research_mode: "basic", "comprehensive", or "targeted"
- config: Complete ResearchConfig object with appropriate settings
- description: Brief explanation of what this preset researches
Make presets relevant to the user's specific industry, audience, and content goals.
7. RESEARCH PREFERENCES:
- "research_preferences": Extract and structure research preferences from onboarding:
- research_depth: From research_preferences.research_depth
- content_types: From research_preferences.content_types
- auto_research: From research_preferences.auto_research
- factual_content: From research_preferences.factual_content
=== OUTPUT REQUIREMENTS ===
Return a valid JSON object matching this exact structure:
{{
"default_industry": "string",
"default_target_audience": "string",
"default_research_mode": "basic" | "comprehensive" | "targeted",
"default_provider": "google" | "exa",
"suggested_keywords": ["keyword1", "keyword2", ...],
"keyword_expansion_patterns": {{
"keyword": ["expansion1", "expansion2", ...]
}},
"suggested_exa_domains": ["domain1.com", "domain2.com", ...],
"suggested_exa_category": "string or null",
"research_angles": ["angle1", "angle2", ...],
"query_enhancement_rules": {{
"pattern": "template"
}},
"recommended_presets": [
{{
"name": "string",
"keywords": "string",
"industry": "string",
"target_audience": "string",
"research_mode": "basic" | "comprehensive" | "targeted",
"config": {{
"mode": "basic" | "comprehensive" | "targeted",
"provider": "google" | "exa",
"max_sources": 10 | 15 | 12,
"include_statistics": true | false,
"include_expert_quotes": true | false,
"include_competitors": true | false,
"include_trends": true | false,
"exa_category": "string or null",
"exa_include_domains": ["domain1.com", ...],
"exa_search_type": "auto" | "keyword" | "neural"
}},
"description": "string"
}}
],
"research_preferences": {{
"research_depth": "string",
"content_types": ["type1", "type2", ...],
"auto_research": true | false,
"factual_content": true | false
}},
"version": "1.0",
"confidence_score": 85.0
}}
=== IMPORTANT INSTRUCTIONS ===
1. Be highly specific and personalized - use actual data from the user's business, persona, and preferences.
2. Avoid generic suggestions - every field should reflect the user's unique context.
3. For industries not clearly identified, infer from website_analysis.content_characteristics or writing_style.
4. Ensure all suggested keywords, domains, and angles are relevant to the user's industry and audience.
5. Generate realistic, actionable presets that the user would actually want to use.
6. Confidence score should reflect data richness (0-100): higher if rich onboarding data, lower if minimal data.
7. Return ONLY valid JSON - no markdown formatting, no explanatory text.
Generate the research persona now:
"""
return prompt
def get_json_schema(self) -> Dict[str, Any]:
"""Return JSON schema for structured LLM response."""
# This will be used with llm_text_gen(json_struct=...)
from models.research_persona_models import ResearchPersona, ResearchPreset
# Convert Pydantic model to JSON schema
return ResearchPersona.schema()

View File

@@ -0,0 +1,194 @@
"""
Research Persona Scheduler
Handles scheduled generation of research personas after onboarding.
"""
from datetime import datetime, timedelta, timezone
from typing import Dict, Any
from loguru import logger
from services.database import get_db_session
from services.research.research_persona_service import ResearchPersonaService
from models.scheduler_models import SchedulerEventLog
async def generate_research_persona_task(user_id: str):
"""
Async task function to generate research persona for a user.
This function is called by the scheduler 20 minutes after onboarding completion.
Args:
user_id: User ID (Clerk string)
"""
db = None
try:
logger.info(f"Scheduled research persona generation started for user {user_id}")
# Get database session
db = get_db_session()
if not db:
logger.error(f"Failed to get database session for research persona generation (user: {user_id})")
return
# Generate research persona
persona_service = ResearchPersonaService(db_session=db)
# Check if persona already exists to avoid unnecessary API calls
persona_data = persona_service._get_persona_data_record(user_id)
if persona_data and persona_data.research_persona:
logger.info(f"Research persona already exists for user {user_id}, skipping generation")
return
start_time = datetime.utcnow()
try:
research_persona = persona_service.get_or_generate(user_id, force_refresh=False)
execution_time = (datetime.utcnow() - start_time).total_seconds()
if research_persona:
logger.info(f"✅ Scheduled research persona generation completed for user {user_id}")
# Log success to scheduler event log for dashboard
try:
event_log = SchedulerEventLog(
event_type='job_completed',
event_date=start_time,
job_id=f"research_persona_{user_id}",
job_type='one_time',
user_id=user_id,
event_data={
'job_function': 'generate_research_persona_task',
'execution_time_seconds': execution_time,
'status': 'success'
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log persona generation success to scheduler event log: {log_error}")
if db:
db.rollback()
else:
error_msg = (
f"Scheduled research persona generation FAILED for user {user_id}. "
f"Expensive API call was made but generation failed. "
f"Will NOT automatically retry to prevent wasteful API calls."
)
logger.error(f"{error_msg}")
# Log failure to scheduler event log for dashboard visibility
try:
event_log = SchedulerEventLog(
event_type='job_failed',
event_date=start_time,
job_id=f"research_persona_{user_id}",
job_type='one_time',
user_id=user_id,
error_message=error_msg,
event_data={
'job_function': 'generate_research_persona_task',
'execution_time_seconds': execution_time,
'status': 'failed',
'failure_reason': 'generation_returned_none',
'expensive_api_call': True
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log persona generation failure to scheduler event log: {log_error}")
if db:
db.rollback()
# DO NOT reschedule - this prevents infinite retry loops
# User can manually trigger generation from frontend if needed
except Exception as gen_error:
execution_time = (datetime.utcnow() - start_time).total_seconds()
error_msg = (
f"Exception during scheduled research persona generation for user {user_id}: {str(gen_error)}. "
f"Expensive API call may have been made. Will NOT automatically retry."
)
logger.error(f"{error_msg}")
# Log exception to scheduler event log for dashboard visibility
try:
event_log = SchedulerEventLog(
event_type='job_failed',
event_date=start_time,
job_id=f"research_persona_{user_id}", # Match scheduled job ID format
job_type='one_time',
user_id=user_id,
error_message=error_msg,
event_data={
'job_function': 'generate_research_persona_task',
'execution_time_seconds': execution_time,
'status': 'failed',
'failure_reason': 'exception',
'exception_type': type(gen_error).__name__,
'exception_message': str(gen_error),
'expensive_api_call': True
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log persona generation exception to scheduler event log: {log_error}")
if db:
db.rollback()
# DO NOT reschedule - prevent infinite retry loops
except Exception as e:
logger.error(f"Error in scheduled research persona generation for user {user_id}: {e}")
finally:
if db:
try:
db.close()
except Exception as e:
logger.error(f"Error closing database session: {e}")
def schedule_research_persona_generation(user_id: str, delay_minutes: int = 20) -> str:
"""
Schedule research persona generation for a user after a delay.
Args:
user_id: User ID (Clerk string)
delay_minutes: Delay in minutes before generating persona (default: 20)
Returns:
Job ID
"""
try:
from services.scheduler import get_scheduler
scheduler = get_scheduler()
# Calculate run date (current time + delay) - ensure UTC timezone-aware
run_date = datetime.now(timezone.utc) + timedelta(minutes=delay_minutes)
# Generate consistent job ID (without timestamp) for proper restoration
# This allows restoration to find and restore the job with original scheduled time
# Note: Clerk user_id already includes "user_" prefix, so we don't add it again
job_id = f"research_persona_{user_id}"
# Schedule the task
scheduled_job_id = scheduler.schedule_one_time_task(
func=generate_research_persona_task,
run_date=run_date,
job_id=job_id,
kwargs={"user_id": user_id},
replace_existing=True
)
logger.info(
f"Scheduled research persona generation for user {user_id} "
f"at {run_date} (job_id: {scheduled_job_id})"
)
return scheduled_job_id
except Exception as e:
logger.error(f"Failed to schedule research persona generation for user {user_id}: {e}")
raise

View File

@@ -0,0 +1,384 @@
"""
Research Persona Service
Handles generation, caching, and retrieval of AI-powered research personas.
"""
from typing import Dict, Any, Optional
from datetime import datetime, timedelta
from loguru import logger
from fastapi import HTTPException
from services.database import get_db_session
from models.onboarding import PersonaData, OnboardingSession
from models.research_persona_models import ResearchPersona
from .research_persona_prompt_builder import ResearchPersonaPromptBuilder
from services.llm_providers.main_text_generation import llm_text_gen
from services.onboarding.database_service import OnboardingDatabaseService
from services.persona_data_service import PersonaDataService
class ResearchPersonaService:
"""Service for generating and managing research personas."""
CACHE_TTL_DAYS = 7 # 7-day cache TTL
def __init__(self, db_session=None):
self.db = db_session or get_db_session()
self.prompt_builder = ResearchPersonaPromptBuilder()
self.onboarding_service = OnboardingDatabaseService(db=self.db)
self.persona_data_service = PersonaDataService(db_session=self.db)
def get_cached_only(
self,
user_id: str
) -> Optional[ResearchPersona]:
"""
Get research persona for user ONLY if it exists in cache.
This method NEVER generates - it only returns cached personas.
Use this for config endpoints to avoid triggering rate limit checks.
Args:
user_id: User ID (Clerk string)
Returns:
ResearchPersona if cached and valid, None otherwise
"""
try:
# Get persona data record
persona_data = self._get_persona_data_record(user_id)
if not persona_data:
logger.debug(f"No persona data found for user {user_id}")
return None
# Only return if cache is valid and persona exists
if self.is_cache_valid(persona_data) and persona_data.research_persona:
try:
logger.debug(f"Returning cached research persona for user {user_id}")
return ResearchPersona(**persona_data.research_persona)
except Exception as e:
logger.warning(f"Failed to parse cached research persona: {e}")
return None
# Cache invalid or persona missing - return None (don't generate)
logger.debug(f"No valid cached research persona for user {user_id}")
return None
except Exception as e:
logger.error(f"Error getting cached research persona for user {user_id}: {e}")
return None
def get_or_generate(
self,
user_id: str,
force_refresh: bool = False
) -> Optional[ResearchPersona]:
"""
Get research persona for user, generating if missing or expired.
Args:
user_id: User ID (Clerk string)
force_refresh: If True, regenerate even if cache is valid
Returns:
ResearchPersona if successful, None otherwise
"""
try:
# Get persona data record
persona_data = self._get_persona_data_record(user_id)
if not persona_data:
logger.warning(f"No persona data found for user {user_id}, cannot generate research persona")
return None
# Check cache if not forcing refresh
if not force_refresh and self.is_cache_valid(persona_data):
if persona_data.research_persona:
logger.info(f"Using cached research persona for user {user_id}")
try:
return ResearchPersona(**persona_data.research_persona)
except Exception as e:
logger.warning(f"Failed to parse cached research persona: {e}, regenerating...")
# Fall through to regeneration
else:
logger.info(f"Research persona missing for user {user_id}, generating...")
else:
if force_refresh:
logger.info(f"Forcing refresh of research persona for user {user_id}")
else:
logger.info(f"Cache expired for user {user_id}, regenerating...")
# Generate new research persona
try:
research_persona = self.generate_research_persona(user_id)
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
raise
if research_persona:
# Save to database
if self.save_research_persona(user_id, research_persona):
logger.info(f"✅ Research persona generated and saved for user {user_id}")
else:
logger.warning(f"Failed to save research persona for user {user_id}")
return research_persona
else:
# Log detailed error for debugging expensive failures
logger.error(
f"❌ Failed to generate research persona for user {user_id} - "
f"This is an expensive failure (API call consumed). Check logs above for details."
)
# Don't return None silently - let the caller know this failed
return None
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
raise
except Exception as e:
logger.error(f"Error getting/generating research persona for user {user_id}: {e}")
return None
def generate_research_persona(self, user_id: str) -> Optional[ResearchPersona]:
"""
Generate a new research persona for the user.
Args:
user_id: User ID (Clerk string)
Returns:
ResearchPersona if successful, None otherwise
"""
try:
logger.info(f"Generating research persona for user {user_id}")
# Collect onboarding data
onboarding_data = self._collect_onboarding_data(user_id)
if not onboarding_data:
logger.warning(f"Insufficient onboarding data for user {user_id}")
return None
# Build prompt
prompt = self.prompt_builder.build_research_persona_prompt(onboarding_data)
# Get JSON schema for structured response
json_schema = self.prompt_builder.get_json_schema()
# Call LLM with structured JSON response
logger.info(f"Calling LLM for research persona generation (user: {user_id})")
try:
response_text = llm_text_gen(
prompt=prompt,
json_struct=json_schema,
user_id=user_id
)
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
logger.warning(f"HTTPException during LLM call for user {user_id} - re-raising")
raise
except RuntimeError as e:
# Re-raise RuntimeError (subscription limits) as HTTPException
logger.warning(f"RuntimeError during LLM call for user {user_id}: {e}")
raise HTTPException(status_code=429, detail=str(e))
if not response_text:
logger.error("Empty response from LLM")
return None
# Parse JSON response
import json
try:
# When json_struct is provided, llm_text_gen may return a dict directly
if isinstance(response_text, dict):
# Already parsed, use directly
persona_dict = response_text
elif isinstance(response_text, str):
# Handle case where LLM returns markdown-wrapped JSON or plain JSON string
response_text = response_text.strip()
if response_text.startswith("```json"):
response_text = response_text[7:]
if response_text.startswith("```"):
response_text = response_text[3:]
if response_text.endswith("```"):
response_text = response_text[:-3]
response_text = response_text.strip()
persona_dict = json.loads(response_text)
else:
logger.error(f"Unexpected response type from LLM: {type(response_text)}")
return None
# Add generated_at timestamp
persona_dict["generated_at"] = datetime.utcnow().isoformat()
# Validate and create ResearchPersona
# Log the dict structure for debugging if validation fails
try:
research_persona = ResearchPersona(**persona_dict)
logger.info(f"✅ Research persona generated successfully for user {user_id}")
return research_persona
except Exception as validation_error:
logger.error(f"Failed to validate ResearchPersona from dict: {validation_error}")
logger.debug(f"Persona dict keys: {list(persona_dict.keys()) if isinstance(persona_dict, dict) else 'Not a dict'}")
logger.debug(f"Persona dict sample: {str(persona_dict)[:500]}")
# Re-raise to be caught by outer exception handler
raise
except json.JSONDecodeError as e:
logger.error(f"Failed to parse LLM response as JSON: {e}")
logger.debug(f"Response text: {response_text[:500] if isinstance(response_text, str) else str(response_text)[:500]}")
return None
except Exception as e:
logger.error(f"Failed to create ResearchPersona from response: {e}")
return None
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
raise
except Exception as e:
logger.error(f"Error generating research persona for user {user_id}: {e}")
return None
def is_cache_valid(self, persona_data: PersonaData) -> bool:
"""
Check if cached research persona is still valid (within TTL).
Args:
persona_data: PersonaData database record
Returns:
True if cache is valid, False otherwise
"""
if not persona_data.research_persona_generated_at:
return False
# Check if within TTL
cache_age = datetime.utcnow() - persona_data.research_persona_generated_at
is_valid = cache_age < timedelta(days=self.CACHE_TTL_DAYS)
if not is_valid:
logger.debug(f"Cache expired (age: {cache_age.days} days, TTL: {self.CACHE_TTL_DAYS} days)")
return is_valid
def save_research_persona(
self,
user_id: str,
research_persona: ResearchPersona
) -> bool:
"""
Save research persona to database.
Args:
user_id: User ID (Clerk string)
research_persona: ResearchPersona to save
Returns:
True if successful, False otherwise
"""
try:
persona_data = self._get_persona_data_record(user_id)
if not persona_data:
logger.error(f"No persona data record found for user {user_id}")
return False
# Convert ResearchPersona to dict for JSON storage
persona_dict = research_persona.dict()
# Update database record
persona_data.research_persona = persona_dict
persona_data.research_persona_generated_at = datetime.utcnow()
self.db.commit()
logger.info(f"✅ Research persona saved for user {user_id}")
return True
except Exception as e:
logger.error(f"Error saving research persona for user {user_id}: {e}")
self.db.rollback()
return False
def _get_persona_data_record(self, user_id: str) -> Optional[PersonaData]:
"""Get PersonaData database record for user."""
try:
# Ensure research_persona columns exist before querying
self.onboarding_service._ensure_research_persona_columns(self.db)
# Get onboarding session
session = self.db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).first()
if not session:
return None
# Get persona data
persona_data = self.db.query(PersonaData).filter(
PersonaData.session_id == session.id
).first()
return persona_data
except Exception as e:
logger.error(f"Error getting persona data record for user {user_id}: {e}")
return None
def _collect_onboarding_data(self, user_id: str) -> Optional[Dict[str, Any]]:
"""
Collect all onboarding data needed for research persona generation.
Returns:
Dictionary with website_analysis, persona_data, research_preferences, business_info
"""
try:
# Get website analysis
website_analysis = self.onboarding_service.get_website_analysis(user_id, self.db) or {}
# Get persona data
persona_data_dict = self.onboarding_service.get_persona_data(user_id, self.db) or {}
# Get research preferences
research_prefs = self.onboarding_service.get_research_preferences(user_id, self.db) or {}
# Get business info - construct from persona data and website analysis
business_info = {}
# Try to extract from persona data
if persona_data_dict:
core_persona = persona_data_dict.get('corePersona') or persona_data_dict.get('core_persona')
if core_persona:
if core_persona.get('industry'):
business_info['industry'] = core_persona['industry']
if core_persona.get('target_audience'):
business_info['target_audience'] = core_persona['target_audience']
# Fallback to website analysis if not in persona
if not business_info.get('industry') and website_analysis:
target_audience_data = website_analysis.get('target_audience', {})
if isinstance(target_audience_data, dict):
industry_focus = target_audience_data.get('industry_focus')
if industry_focus:
business_info['industry'] = industry_focus
demographics = target_audience_data.get('demographics')
if demographics:
business_info['target_audience'] = demographics if isinstance(demographics, str) else str(demographics)
# Check if we have enough data
if not website_analysis and not persona_data_dict:
logger.warning(f"Insufficient onboarding data for user {user_id}")
return None
return {
"website_analysis": website_analysis,
"persona_data": persona_data_dict,
"research_preferences": research_prefs,
"business_info": business_info
}
except Exception as e:
logger.error(f"Error collecting onboarding data for user {user_id}: {e}")
return None

View File

@@ -10,7 +10,9 @@ from .core.exception_handler import (
TaskExecutionError, DatabaseError, TaskLoaderError, SchedulerConfigError
)
from .executors.monitoring_task_executor import MonitoringTaskExecutor
from .executors.oauth_token_monitoring_executor import OAuthTokenMonitoringExecutor
from .utils.task_loader import load_due_monitoring_tasks
from .utils.oauth_token_task_loader import load_due_oauth_token_monitoring_tasks
# Global scheduler instance (initialized on first access)
_scheduler_instance: TaskScheduler = None
@@ -37,6 +39,14 @@ def get_scheduler() -> TaskScheduler:
monitoring_executor,
load_due_monitoring_tasks
)
# Register OAuth token monitoring executor
oauth_token_executor = OAuthTokenMonitoringExecutor()
_scheduler_instance.register_executor(
'oauth_token_monitoring',
oauth_token_executor,
load_due_oauth_token_monitoring_tasks
)
return _scheduler_instance
@@ -46,6 +56,7 @@ __all__ = [
'TaskExecutor',
'TaskExecutionResult',
'MonitoringTaskExecutor',
'OAuthTokenMonitoringExecutor',
'get_scheduler',
# Exception handling
'SchedulerExceptionHandler',

View File

@@ -0,0 +1,141 @@
"""
Check Cycle Handler
Handles the main scheduler check cycle that finds and executes due tasks.
"""
from typing import TYPE_CHECKING, Dict, Any
from datetime import datetime
from sqlalchemy.orm import Session
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from models.scheduler_models import SchedulerEventLog
from .exception_handler import DatabaseError
from .interval_manager import adjust_check_interval_if_needed
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("check_cycle_handler")
async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
"""
Main scheduler loop: check for due tasks and execute them.
This runs periodically with intelligent interval adjustment based on active strategies.
Args:
scheduler: TaskScheduler instance
"""
scheduler.stats['total_checks'] += 1
check_start_time = datetime.utcnow()
scheduler.stats['last_check'] = check_start_time.isoformat()
# Track execution summary for this check cycle
cycle_summary = {
'tasks_found_by_type': {},
'tasks_executed_by_type': {},
'tasks_failed_by_type': {},
'total_found': 0,
'total_executed': 0,
'total_failed': 0
}
db = None
try:
db = get_db_session()
if db is None:
logger.error("[Scheduler Check] ❌ Failed to get database session")
return
# Check for active strategies and adjust interval intelligently
await adjust_check_interval_if_needed(scheduler, db)
# Check each registered task type
registered_types = scheduler.registry.get_registered_types()
for task_type in registered_types:
type_summary = await scheduler._process_task_type(task_type, db, cycle_summary)
if type_summary:
cycle_summary['tasks_found_by_type'][task_type] = type_summary.get('found', 0)
cycle_summary['tasks_executed_by_type'][task_type] = type_summary.get('executed', 0)
cycle_summary['tasks_failed_by_type'][task_type] = type_summary.get('failed', 0)
# Calculate totals
cycle_summary['total_found'] = sum(cycle_summary['tasks_found_by_type'].values())
cycle_summary['total_executed'] = sum(cycle_summary['tasks_executed_by_type'].values())
cycle_summary['total_failed'] = sum(cycle_summary['tasks_failed_by_type'].values())
# Log comprehensive check cycle summary
check_duration = (datetime.utcnow() - check_start_time).total_seconds()
active_strategies = scheduler.stats.get('active_strategies_count', 0)
active_executions = len(scheduler.active_executions)
# Build comprehensive check cycle summary log message
check_lines = [
f"[Scheduler Check] 🔍 Check Cycle #{scheduler.stats['total_checks']} Completed",
f" ├─ Duration: {check_duration:.2f}s",
f" ├─ Active Strategies: {active_strategies}",
f" ├─ Check Interval: {scheduler.current_check_interval_minutes}min",
f" ├─ User Isolation: Enabled (tasks filtered by user_id)",
f" ├─ Tasks Found: {cycle_summary['total_found']} total"
]
if cycle_summary['tasks_found_by_type']:
task_types_list = list(cycle_summary['tasks_found_by_type'].items())
for idx, (task_type, count) in enumerate(task_types_list):
executed = cycle_summary['tasks_executed_by_type'].get(task_type, 0)
failed = cycle_summary['tasks_failed_by_type'].get(task_type, 0)
is_last_task_type = idx == len(task_types_list) - 1 and cycle_summary['total_executed'] == 0 and cycle_summary['total_failed'] == 0
prefix = " └─" if is_last_task_type else " ├─"
check_lines.append(f"{prefix} {task_type}: {count} found, {executed} executed, {failed} failed")
if cycle_summary['total_found'] > 0:
check_lines.append(f" ├─ Total Executed: {cycle_summary['total_executed']}")
check_lines.append(f" ├─ Total Failed: {cycle_summary['total_failed']}")
check_lines.append(f" └─ Active Executions: {active_executions}/{scheduler.max_concurrent_executions}")
else:
check_lines.append(f" └─ No tasks found - scheduler idle")
# Log comprehensive check cycle summary in single message
logger.warning("\n".join(check_lines))
# Save check cycle event to database for historical tracking
try:
event_log = SchedulerEventLog(
event_type='check_cycle',
event_date=check_start_time,
check_cycle_number=scheduler.stats['total_checks'],
check_interval_minutes=scheduler.current_check_interval_minutes,
tasks_found=cycle_summary.get('total_found', 0),
tasks_executed=cycle_summary.get('total_executed', 0),
tasks_failed=cycle_summary.get('total_failed', 0),
tasks_by_type=cycle_summary.get('tasks_found_by_type', {}),
check_duration_seconds=check_duration,
active_strategies_count=active_strategies,
active_executions=active_executions,
event_data={
'executed_by_type': cycle_summary.get('tasks_executed_by_type', {}),
'failed_by_type': cycle_summary.get('tasks_failed_by_type', {})
}
)
db.add(event_log)
db.commit()
except Exception as e:
logger.warning(f"Failed to save check cycle event log: {e}")
if db:
db.rollback()
# Update last_update timestamp for frontend polling
scheduler.stats['last_update'] = datetime.utcnow().isoformat()
except Exception as e:
error = DatabaseError(
message=f"Error checking for due tasks: {str(e)}",
original_error=e
)
scheduler.exception_handler.handle_exception(error)
logger.error(f"[Scheduler Check] ❌ Error in check cycle: {str(e)}")
finally:
if db:
db.close()

View File

@@ -0,0 +1,139 @@
"""
Interval Manager
Handles intelligent scheduling interval adjustment based on active strategies.
"""
from typing import TYPE_CHECKING
from datetime import datetime
from sqlalchemy.orm import Session
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from models.scheduler_models import SchedulerEventLog
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("interval_manager")
async def determine_optimal_interval(
scheduler: 'TaskScheduler',
min_interval: int,
max_interval: int
) -> int:
"""
Determine optimal check interval based on active strategies.
Args:
scheduler: TaskScheduler instance
min_interval: Minimum check interval in minutes
max_interval: Maximum check interval in minutes
Returns:
Optimal check interval in minutes
"""
db = None
try:
db = get_db_session()
if db:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
scheduler.stats['active_strategies_count'] = active_count
if active_count > 0:
logger.info(f"Found {active_count} active strategies with tasks - using {min_interval}min interval")
return min_interval
else:
logger.info(f"No active strategies with tasks - using {max_interval}min interval")
return max_interval
except Exception as e:
logger.warning(f"Error determining optimal interval: {e}, using default {min_interval}min")
finally:
if db:
db.close()
# Default to shorter interval on error (safer)
return min_interval
async def adjust_check_interval_if_needed(
scheduler: 'TaskScheduler',
db: Session
):
"""
Intelligently adjust check interval based on active strategies.
If there are active strategies with tasks, check more frequently.
If there are no active strategies, check less frequently.
Args:
scheduler: TaskScheduler instance
db: Database session
"""
try:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
scheduler.stats['active_strategies_count'] = active_count
# Determine optimal interval
if active_count > 0:
optimal_interval = scheduler.min_check_interval_minutes
else:
optimal_interval = scheduler.max_check_interval_minutes
# Only reschedule if interval needs to change
if optimal_interval != scheduler.current_check_interval_minutes:
interval_message = (
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
f" ├─ Optimal: {optimal_interval}min\n"
f" ├─ Active Strategies: {active_count}\n"
f" └─ Reason: {'Active strategies detected' if active_count > 0 else 'No active strategies'}"
)
logger.warning(interval_message)
# Reschedule the job with new interval
scheduler.scheduler.modify_job(
'check_due_tasks',
trigger=scheduler._get_trigger_for_interval(optimal_interval)
)
# Save previous interval before updating
previous_interval = scheduler.current_check_interval_minutes
# Update current interval
scheduler.current_check_interval_minutes = optimal_interval
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
# Save interval adjustment event to database
try:
event_db = get_db_session()
if event_db:
event_log = SchedulerEventLog(
event_type='interval_adjustment',
event_date=datetime.utcnow(),
previous_interval_minutes=previous_interval,
new_interval_minutes=optimal_interval,
check_interval_minutes=optimal_interval,
active_strategies_count=active_count,
event_data={
'reason': 'intelligent_scheduling',
'min_interval': scheduler.min_check_interval_minutes,
'max_interval': scheduler.max_check_interval_minutes
}
)
event_db.add(event_log)
event_db.commit()
event_db.close()
except Exception as e:
logger.warning(f"Failed to save interval adjustment event log: {e}")
logger.warning(f"[Scheduler] ✅ Interval adjusted to {optimal_interval}min")
except Exception as e:
logger.warning(f"Error adjusting check interval: {e}")

View File

@@ -0,0 +1,269 @@
"""
Job Restoration
Handles restoration of one-time jobs (e.g., persona generation) on scheduler startup.
Preserves original scheduled times from database to avoid rescheduling on server restarts.
"""
from typing import TYPE_CHECKING
from datetime import datetime, timezone, timedelta
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from models.scheduler_models import SchedulerEventLog
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("job_restoration")
async def restore_persona_jobs(scheduler: 'TaskScheduler'):
"""
Restore one-time persona generation jobs for users who completed onboarding
but don't have personas yet. This ensures jobs persist across server restarts.
IMPORTANT: Preserves original scheduled times from SchedulerEventLog to avoid
rescheduling jobs with new times on server restarts.
Args:
scheduler: TaskScheduler instance
"""
try:
db = get_db_session()
if not db:
logger.warning("Could not get database session to restore persona jobs")
return
try:
from models.onboarding import OnboardingSession
from services.research.research_persona_scheduler import (
schedule_research_persona_generation,
generate_research_persona_task
)
from services.persona.facebook.facebook_persona_scheduler import (
schedule_facebook_persona_generation,
generate_facebook_persona_task
)
from services.research.research_persona_service import ResearchPersonaService
from services.persona_data_service import PersonaDataService
# Get all users who completed onboarding
completed_sessions = db.query(OnboardingSession).filter(
OnboardingSession.progress == 100.0
).all()
restored_count = 0
skipped_count = 0
now = datetime.utcnow().replace(tzinfo=timezone.utc)
for session in completed_sessions:
user_id = session.user_id
# Restore research persona job
try:
research_service = ResearchPersonaService(db_session=db)
persona_data_record = research_service._get_persona_data_record(user_id)
research_persona_exists = False
if persona_data_record:
research_persona_data = getattr(persona_data_record, 'research_persona', None)
research_persona_exists = bool(research_persona_data)
if not research_persona_exists:
# Note: Clerk user_id already includes "user_" prefix
job_id = f"research_persona_{user_id}"
# Check if job already exists in scheduler (just started, so unlikely)
existing_jobs = [j for j in scheduler.scheduler.get_jobs()
if j.id == job_id]
if not existing_jobs:
# Check SchedulerEventLog for original scheduled time
original_scheduled_event = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type == 'job_scheduled',
SchedulerEventLog.job_id == job_id,
SchedulerEventLog.user_id == user_id
).order_by(SchedulerEventLog.event_date.desc()).first()
# Check if job was already completed or failed
completed_event = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type.in_(['job_completed', 'job_failed']),
SchedulerEventLog.job_id == job_id,
SchedulerEventLog.user_id == user_id
).order_by(SchedulerEventLog.event_date.desc()).first()
if completed_event:
# Job was already completed/failed, skip
skipped_count += 1
logger.debug(f"Research persona job {job_id} already completed/failed, skipping restoration")
elif original_scheduled_event and original_scheduled_event.event_data:
# Restore with original scheduled time
scheduled_for_str = original_scheduled_event.event_data.get('scheduled_for')
if scheduled_for_str:
try:
original_time = datetime.fromisoformat(scheduled_for_str.replace('Z', '+00:00'))
if original_time.tzinfo is None:
original_time = original_time.replace(tzinfo=timezone.utc)
# Check if original time is in the past (within grace period)
time_since_scheduled = (now - original_time).total_seconds()
if time_since_scheduled > 0 and time_since_scheduled <= 3600: # Within 1 hour grace period
# Execute immediately (missed job)
logger.warning(f"Restoring research persona job {job_id} - original time was {original_time}, executing now (missed)")
try:
await generate_research_persona_task(user_id)
except Exception as exec_error:
logger.error(f"Error executing missed research persona job {job_id}: {exec_error}")
elif original_time > now:
# Restore with original future time
time_until_run = (original_time - now).total_seconds() / 60 # minutes
logger.warning(
f"[Restoration] Restoring research persona job {job_id} with ORIGINAL scheduled time: "
f"{original_time} (UTC) = {original_time.astimezone().strftime('%H:%M:%S %Z')} (local), "
f"will run in {time_until_run:.1f} minutes"
)
scheduler.schedule_one_time_task(
func=generate_research_persona_task,
run_date=original_time,
job_id=job_id,
kwargs={'user_id': user_id},
replace_existing=True
)
restored_count += 1
else:
# Too old (beyond grace period), skip
skipped_count += 1
logger.debug(f"Research persona job {job_id} scheduled time {original_time} is too old, skipping")
except Exception as time_error:
logger.warning(f"Error parsing original scheduled time for {job_id}: {time_error}, scheduling new job")
# Fall through to schedule new job
schedule_research_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
# No original time in event data, schedule new job
logger.warning(
f"[Restoration] No original scheduled time found for research persona job {job_id}, "
f"scheduling NEW job with current time + 20 minutes"
)
schedule_research_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
# No previous scheduled event, schedule new job
logger.warning(
f"[Restoration] No previous scheduled event found for research persona job {job_id}, "
f"scheduling NEW job with current time + 20 minutes"
)
schedule_research_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
skipped_count += 1
logger.debug(f"Research persona job {job_id} already exists in scheduler, skipping restoration")
except Exception as e:
logger.debug(f"Could not restore research persona for user {user_id}: {e}")
# Restore Facebook persona job
try:
persona_data_service = PersonaDataService(db_session=db)
persona_data = persona_data_service.get_user_persona_data(user_id)
platform_personas = persona_data.get('platform_personas', {}) if persona_data else {}
facebook_persona_exists = bool(platform_personas.get('facebook') if platform_personas else None)
has_core_persona = bool(persona_data.get('core_persona') if persona_data else False)
if not facebook_persona_exists and has_core_persona:
# Note: Clerk user_id already includes "user_" prefix
job_id = f"facebook_persona_{user_id}"
# Check if job already exists in scheduler
existing_jobs = [j for j in scheduler.scheduler.get_jobs()
if j.id == job_id]
if not existing_jobs:
# Check SchedulerEventLog for original scheduled time
original_scheduled_event = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type == 'job_scheduled',
SchedulerEventLog.job_id == job_id,
SchedulerEventLog.user_id == user_id
).order_by(SchedulerEventLog.event_date.desc()).first()
# Check if job was already completed or failed
completed_event = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type.in_(['job_completed', 'job_failed']),
SchedulerEventLog.job_id == job_id,
SchedulerEventLog.user_id == user_id
).order_by(SchedulerEventLog.event_date.desc()).first()
if completed_event:
skipped_count += 1
logger.debug(f"Facebook persona job {job_id} already completed/failed, skipping restoration")
elif original_scheduled_event and original_scheduled_event.event_data:
# Restore with original scheduled time
scheduled_for_str = original_scheduled_event.event_data.get('scheduled_for')
if scheduled_for_str:
try:
original_time = datetime.fromisoformat(scheduled_for_str.replace('Z', '+00:00'))
if original_time.tzinfo is None:
original_time = original_time.replace(tzinfo=timezone.utc)
# Check if original time is in the past (within grace period)
time_since_scheduled = (now - original_time).total_seconds()
if time_since_scheduled > 0 and time_since_scheduled <= 3600: # Within 1 hour grace period
# Execute immediately (missed job)
logger.warning(f"Restoring Facebook persona job {job_id} - original time was {original_time}, executing now (missed)")
try:
await generate_facebook_persona_task(user_id)
except Exception as exec_error:
logger.error(f"Error executing missed Facebook persona job {job_id}: {exec_error}")
elif original_time > now:
# Restore with original future time
time_until_run = (original_time - now).total_seconds() / 60 # minutes
logger.warning(
f"[Restoration] Restoring Facebook persona job {job_id} with ORIGINAL scheduled time: "
f"{original_time} (UTC) = {original_time.astimezone().strftime('%H:%M:%S %Z')} (local), "
f"will run in {time_until_run:.1f} minutes"
)
scheduler.schedule_one_time_task(
func=generate_facebook_persona_task,
run_date=original_time,
job_id=job_id,
kwargs={'user_id': user_id},
replace_existing=True
)
restored_count += 1
else:
skipped_count += 1
logger.debug(f"Facebook persona job {job_id} scheduled time {original_time} is too old, skipping")
except Exception as time_error:
logger.warning(f"Error parsing original scheduled time for {job_id}: {time_error}, scheduling new job")
schedule_facebook_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
logger.warning(
f"[Restoration] No original scheduled time found for Facebook persona job {job_id}, "
f"scheduling NEW job with current time + 20 minutes"
)
schedule_facebook_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
# No previous scheduled event, schedule new job
logger.warning(
f"[Restoration] No previous scheduled event found for Facebook persona job {job_id}, "
f"scheduling NEW job with current time + 20 minutes"
)
schedule_facebook_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
skipped_count += 1
logger.debug(f"Facebook persona job {job_id} already exists in scheduler, skipping restoration")
except Exception as e:
logger.debug(f"Could not restore Facebook persona for user {user_id}: {e}")
if restored_count > 0:
logger.warning(f"[Scheduler] ✅ Restored {restored_count} persona generation job(s) on startup (preserved original scheduled times)")
if skipped_count > 0:
logger.debug(f"[Scheduler] Skipped {skipped_count} persona job(s) (already completed/failed or exist)")
finally:
db.close()
except Exception as e:
logger.warning(f"Error restoring persona jobs: {e}")

View File

@@ -0,0 +1,196 @@
"""
OAuth Token Monitoring Task Restoration
Automatically creates missing OAuth monitoring tasks for users who have connected platforms
but don't have monitoring tasks created yet.
"""
from datetime import datetime, timedelta
from typing import List
from sqlalchemy.orm import Session
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
from services.oauth_token_monitoring_service import get_connected_platforms, create_oauth_monitoring_tasks
# Use service logger for consistent logging (WARNING level visible in production)
logger = get_service_logger("oauth_task_restoration")
async def restore_oauth_monitoring_tasks(scheduler):
"""
Restore/create missing OAuth token monitoring tasks for all users.
This checks all users who have connected platforms and ensures they have
monitoring tasks created. Tasks are created for platforms that are:
- Connected (detected via get_connected_platforms)
- Missing monitoring tasks (no OAuthTokenMonitoringTask exists)
Args:
scheduler: TaskScheduler instance
"""
try:
logger.warning("[OAuth Task Restoration] Starting OAuth monitoring task restoration...")
db = get_db_session()
if not db:
logger.warning("[OAuth Task Restoration] Could not get database session")
return
try:
# Get all existing OAuth tasks to find unique user_ids
existing_tasks = db.query(OAuthTokenMonitoringTask).all()
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
# Log existing tasks breakdown by platform
existing_by_platform = {}
for task in existing_tasks:
existing_by_platform[task.platform] = existing_by_platform.get(task.platform, 0) + 1
platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(existing_by_platform.items())])
logger.warning(
f"[OAuth Task Restoration] Found {len(existing_tasks)} existing OAuth tasks "
f"for {len(user_ids_with_tasks)} users. Platforms: {platform_summary}"
)
# Check users who already have at least one OAuth task
users_to_check = list(user_ids_with_tasks)
# Also query all users from onboarding who completed step 5 (integrations)
# to catch users who connected platforms but tasks weren't created
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
# This matches the logic used in home page redirect and persona generation checks
try:
from services.onboarding.progress_service import get_onboarding_progress_service
from models.onboarding import OnboardingSession
from sqlalchemy import or_
# Get onboarding progress service (same as used throughout the app)
progress_service = get_onboarding_progress_service()
# Query all sessions and filter using the same completion logic as the service
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
completed_sessions = db.query(OnboardingSession).filter(
or_(
OnboardingSession.current_step >= 6,
OnboardingSession.progress >= 100.0
)
).all()
# Validate using the service method for consistency
onboarding_user_ids = set()
for session in completed_sessions:
# Use the same service method as the rest of the app
status = progress_service.get_onboarding_status(session.user_id)
if status.get('is_completed', False):
onboarding_user_ids.add(session.user_id)
all_user_ids = users_to_check.copy()
# Add users from onboarding who might not have tasks yet
for user_id in onboarding_user_ids:
if user_id not in all_user_ids:
all_user_ids.append(user_id)
users_to_check = all_user_ids
logger.warning(
f"[OAuth Task Restoration] Checking {len(users_to_check)} users "
f"({len(user_ids_with_tasks)} with existing tasks, "
f"{len(onboarding_user_ids)} from onboarding sessions, "
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
)
except Exception as e:
logger.warning(f"[OAuth Task Restoration] Could not query onboarding users: {e}")
# Fallback to users with existing tasks only
total_created = 0
for user_id in users_to_check:
try:
# Get connected platforms for this user
connected_platforms = get_connected_platforms(user_id)
logger.warning(
f"[OAuth Task Restoration] User {user_id}: "
f"Connected platforms: {connected_platforms}"
)
if not connected_platforms:
logger.debug(
f"[OAuth Task Restoration] No connected platforms for user {user_id}, skipping"
)
continue
# Check which platforms are missing tasks
existing_platforms = {
task.platform
for task in existing_tasks
if task.user_id == user_id
}
missing_platforms = [
platform
for platform in connected_platforms
if platform not in existing_platforms
]
if missing_platforms:
logger.warning(
f"[OAuth Task Restoration] ⚠️ User {user_id} has connected platforms "
f"{connected_platforms} but missing tasks for: {missing_platforms}"
)
# Create missing tasks
created = create_oauth_monitoring_tasks(
user_id=user_id,
db=db,
platforms=missing_platforms
)
total_created += len(created)
logger.warning(
f"[OAuth Task Restoration] ✅ Created {len(created)} missing OAuth tasks "
f"for user {user_id}, platforms: {missing_platforms}"
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ User {user_id} has all required tasks "
f"for connected platforms: {connected_platforms}"
)
except Exception as e:
logger.warning(
f"[OAuth Task Restoration] Error checking/creating tasks for user {user_id}: {e}",
exc_info=True
)
continue
# Final summary log with platform breakdown
final_existing_tasks = db.query(OAuthTokenMonitoringTask).all()
final_by_platform = {}
for task in final_existing_tasks:
final_by_platform[task.platform] = final_by_platform.get(task.platform, 0) + 1
final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())])
if total_created > 0:
logger.warning(
f"[OAuth Task Restoration] ✅ Created {total_created} missing OAuth monitoring tasks. "
f"Final platform breakdown: {final_platform_summary}"
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks. "
f"Platform breakdown: {final_platform_summary}"
)
finally:
db.close()
except Exception as e:
logger.error(
f"[OAuth Task Restoration] Error restoring OAuth monitoring tasks: {e}",
exc_info=True
)

View File

@@ -10,6 +10,7 @@ from datetime import datetime
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.interval import IntervalTrigger
from apscheduler.triggers.date import DateTrigger
from sqlalchemy.orm import Session
from .executor_interface import TaskExecutor, TaskExecutionResult
@@ -20,6 +21,13 @@ from .exception_handler import (
)
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from ..utils.user_job_store import get_user_job_store_name
from models.scheduler_models import SchedulerEventLog
from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed
from .job_restoration import restore_persona_jobs
from .oauth_task_restoration import restore_oauth_monitoring_tasks
from .check_cycle_handler import check_and_execute_due_tasks
from .task_execution_handler import execute_task_async
logger = get_service_logger("task_scheduler")
@@ -34,6 +42,14 @@ class TaskScheduler:
- Database-backed task persistence
- Configurable check intervals
- Automatic retry logic
- User isolation: All tasks are filtered by user_id for isolation
- Per-user job store context: Logs show user's website root for debugging
User Isolation:
- Tasks are filtered by user_id in task loaders
- Execution logs include user_id for tracking
- Per-user statistics are maintained
- Job store names (based on website root) are logged for debugging
"""
def __init__(
@@ -63,7 +79,7 @@ class TaskScheduler:
job_defaults={
'coalesce': True,
'max_instances': 1,
'misfire_grace_time': 300 # 5 minutes grace period
'misfire_grace_time': 3600 # 1 hour grace period for missed jobs
}
)
@@ -89,6 +105,7 @@ class TaskScheduler:
'tasks_failed': 0,
'tasks_skipped': 0,
'last_check': None,
'last_update': datetime.utcnow().isoformat(), # Timestamp for frontend polling
'per_user_stats': {}, # Track metrics per user for user isolation
'active_strategies_count': 0, # Track active strategies with tasks
'last_interval_adjustment': None # Track when interval was last adjusted
@@ -141,7 +158,11 @@ class TaskScheduler:
try:
# Determine initial check interval based on active strategies
initial_interval = await self._determine_optimal_interval()
initial_interval = await determine_optimal_interval(
self,
self.min_check_interval_minutes,
self.max_check_interval_minutes
)
self.current_check_interval_minutes = initial_interval
# Add periodic job to check for due tasks
@@ -155,16 +176,228 @@ class TaskScheduler:
self.scheduler.start()
self._running = True
logger.info(
f"Task scheduler started | "
f"check_interval={initial_interval}min | "
f"registered_types={self.registry.get_registered_types()}"
)
# Check for and execute any missed jobs that are still within grace period
await self._execute_missed_jobs()
# Restore one-time persona generation jobs for users who completed onboarding
await restore_persona_jobs(self)
# Restore/create missing OAuth token monitoring tasks for connected platforms
await restore_oauth_monitoring_tasks(self)
# Get all scheduled APScheduler jobs (including one-time tasks)
all_jobs = self.scheduler.get_jobs()
registered_types = self.registry.get_registered_types()
active_strategies = self.stats.get('active_strategies_count', 0)
# Count OAuth token monitoring tasks from database (recurring weekly tasks)
oauth_tasks_count = 0
oauth_tasks_details = []
try:
db = get_db_session()
if db:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
# Count active tasks
oauth_tasks_count = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.status == 'active'
).count()
# Get all tasks (for detailed logging)
all_oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
total_oauth_tasks = len(all_oauth_tasks)
# Show platform breakdown for ALL tasks (active and inactive)
all_platforms = {}
active_platforms = {}
for task in all_oauth_tasks:
all_platforms[task.platform] = all_platforms.get(task.platform, 0) + 1
if task.status == 'active':
active_platforms[task.platform] = active_platforms.get(task.platform, 0) + 1
if total_oauth_tasks > 0:
# Log details about all tasks (not just active)
for task in all_oauth_tasks:
oauth_tasks_details.append(
f"user={task.user_id}, platform={task.platform}, status={task.status}"
)
if total_oauth_tasks > 0 and oauth_tasks_count == 0:
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
logger.warning(
f"[Scheduler] Found {total_oauth_tasks} OAuth monitoring tasks in database, "
f"but {oauth_tasks_count} are active. "
f"All platforms: {all_platform_summary}. "
f"Task details: {', '.join(oauth_tasks_details[:5])}" # Limit to first 5 for readability
)
elif oauth_tasks_count > 0:
# Show platform breakdown for active tasks
active_platform_summary = ", ".join([f"{platform}: {count}" for platform, count in sorted(active_platforms.items())])
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
# Check for missing platforms (expected: gsc, bing, wordpress, wix)
expected_platforms = ['gsc', 'bing', 'wordpress', 'wix']
missing_in_db = [p for p in expected_platforms if p not in all_platforms]
if missing_in_db:
logger.warning(
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
f"All platforms: {all_platform_summary}. "
f"⚠️ Missing platforms (not connected or no tasks): {', '.join(missing_in_db)}"
)
else:
logger.warning(
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
f"All platforms: {all_platform_summary}"
)
db.close()
except Exception as e:
logger.warning(
f"[Scheduler] Could not get OAuth token monitoring tasks count: {e}. "
f"This may indicate the oauth_token_monitoring_tasks table doesn't exist yet or "
f"tasks haven't been created. Error type: {type(e).__name__}"
)
# Calculate job counts
apscheduler_recurring = 1 # check_due_tasks
apscheduler_one_time = len(all_jobs) - 1
total_recurring = apscheduler_recurring + oauth_tasks_count
total_jobs = len(all_jobs) + oauth_tasks_count
# Build comprehensive startup log message
startup_lines = [
f"[Scheduler] ✅ Task Scheduler Started",
f" ├─ Check Interval: {initial_interval} minutes",
f" ├─ Registered Task Types: {len(registered_types)} ({', '.join(registered_types) if registered_types else 'none'})",
f" ├─ Active Strategies: {active_strategies}",
f" ├─ Total Scheduled Jobs: {total_jobs}",
f" ├─ Recurring Jobs: {total_recurring} (check_due_tasks: {apscheduler_recurring}, OAuth monitoring: {oauth_tasks_count})",
f" └─ One-Time Jobs: {apscheduler_one_time}"
]
# Add APScheduler job details
if all_jobs:
for idx, job in enumerate(all_jobs):
is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0
prefix = " └─" if is_last else " ├─"
next_run = job.next_run_time
trigger_type = type(job.trigger).__name__
# Try to extract user_id from job ID or kwargs for context
user_context = ""
user_id_from_job = None
# First try to get from kwargs
if hasattr(job, 'kwargs') and job.kwargs and job.kwargs.get('user_id'):
user_id_from_job = job.kwargs.get('user_id')
# Otherwise, try to extract from job ID (e.g., "research_persona_user_123..." or "research_persona_user123")
elif job.id and ('research_persona_' in job.id or 'facebook_persona_' in job.id):
# Job ID format: research_persona_{user_id} or facebook_persona_{user_id}
# where user_id is Clerk format (e.g., "user_33Gz1FPI86VDXhRY8QN4ragRFGN")
if job.id.startswith('research_persona_'):
user_id_from_job = job.id.replace('research_persona_', '')
elif job.id.startswith('facebook_persona_'):
user_id_from_job = job.id.replace('facebook_persona_', '')
else:
# Fallback: try to extract from parts (old format with timestamp)
parts = job.id.split('_')
if len(parts) >= 3:
user_id_from_job = parts[2] # Extract user_id from job ID
if user_id_from_job:
try:
db = get_db_session()
if db:
user_job_store = get_user_job_store_name(user_id_from_job, db)
if user_job_store == 'default':
logger.debug(
f"[Scheduler] Job store extraction returned 'default' for user {user_id_from_job}. "
f"This may indicate no onboarding data or website URL not found."
)
user_context = f" | User: {user_id_from_job} | Store: {user_job_store}"
db.close()
except Exception as e:
logger.warning(
f"[Scheduler] Could not extract job store name for user {user_id_from_job}: {e}. "
f"Error type: {type(e).__name__}"
)
user_context = f" | User: {user_id_from_job}"
startup_lines.append(f"{prefix} Job: {job.id} | Trigger: {trigger_type} | Next Run: {next_run}{user_context}")
# Add OAuth token monitoring tasks details
# Show ALL OAuth tasks (active and inactive) for complete visibility
if total_oauth_tasks > 0:
try:
db = get_db_session()
if db:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
# Get ALL tasks, not just active ones
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
for idx, task in enumerate(oauth_tasks):
is_last = idx == len(oauth_tasks) - 1 and len(all_jobs) == 0
prefix = " └─" if is_last else " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
if user_job_store == 'default':
logger.debug(
f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
f"This may indicate no onboarding data or website URL not found."
)
except Exception as e:
logger.warning(
f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
f"Using 'default'. Error type: {type(e).__name__}"
)
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
# Include status in the log line for visibility
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
)
db.close()
except Exception as e:
logger.debug(f"Could not get OAuth token monitoring task details: {e}")
# Log comprehensive startup information in single message
logger.warning("\n".join(startup_lines))
# Save scheduler start event to database
try:
db = get_db_session()
if db:
event_log = SchedulerEventLog(
event_type='start',
event_date=datetime.utcnow(),
check_interval_minutes=initial_interval,
active_strategies_count=active_strategies,
event_data={
'registered_types': registered_types,
'total_jobs': total_jobs,
'recurring_jobs': total_recurring,
'one_time_jobs': apscheduler_one_time,
'oauth_monitoring_tasks': oauth_tasks_count
}
)
db.add(event_log)
db.commit()
db.close()
except Exception as e:
logger.warning(f"Failed to save scheduler start event log: {e}")
except Exception as e:
logger.error(f"Failed to start scheduler: {e}")
raise
async def stop(self):
"""Stop the scheduler gracefully."""
if not self._running:
@@ -182,11 +415,48 @@ class TaskScheduler:
timeout=30
)
# Get final job count before shutdown
all_jobs_before = self.scheduler.get_jobs()
# Shutdown scheduler
self.scheduler.shutdown(wait=True)
self._running = False
logger.info("Task scheduler stopped gracefully")
# Log comprehensive shutdown information (use WARNING level for visibility)
total_checks = self.stats.get('total_checks', 0)
total_executed = self.stats.get('tasks_executed', 0)
total_failed = self.stats.get('tasks_failed', 0)
shutdown_message = (
f"[Scheduler] 🛑 Task Scheduler Stopped\n"
f" ├─ Total Check Cycles: {total_checks}\n"
f" ├─ Total Tasks Executed: {total_executed}\n"
f" ├─ Total Tasks Failed: {total_failed}\n"
f" ├─ Jobs Cancelled: {len(all_jobs_before)}\n"
f" └─ Shutdown: Graceful"
)
logger.warning(shutdown_message)
# Save scheduler stop event to database
try:
db = get_db_session()
if db:
event_log = SchedulerEventLog(
event_type='stop',
event_date=datetime.utcnow(),
check_interval_minutes=self.current_check_interval_minutes,
event_data={
'total_checks': total_checks,
'total_executed': total_executed,
'total_failed': total_failed,
'jobs_cancelled': len(all_jobs_before)
}
)
db.add(event_log)
db.commit()
db.close()
except Exception as e:
logger.warning(f"Failed to save scheduler stop event log: {e}")
except Exception as e:
logger.error(f"Error stopping scheduler: {e}")
@@ -197,109 +467,50 @@ class TaskScheduler:
Main scheduler loop: check for due tasks and execute them.
This runs periodically with intelligent interval adjustment based on active strategies.
"""
self.stats['total_checks'] += 1
self.stats['last_check'] = datetime.utcnow().isoformat()
logger.debug("Checking for due tasks...")
db = None
try:
db = get_db_session()
if db is None:
logger.error("Failed to get database session")
return
# Check for active strategies and adjust interval intelligently
await self._adjust_check_interval_if_needed(db)
# Check each registered task type
for task_type in self.registry.get_registered_types():
await self._process_task_type(task_type, db)
except Exception as e:
error = DatabaseError(
message=f"Error checking for due tasks: {str(e)}",
original_error=e
)
self.exception_handler.handle_exception(error)
finally:
if db:
db.close()
async def _determine_optimal_interval(self) -> int:
"""
Determine optimal check interval based on active strategies.
Returns:
Optimal check interval in minutes
"""
db = None
try:
db = get_db_session()
if db:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
self.stats['active_strategies_count'] = active_count
if active_count > 0:
logger.info(f"Found {active_count} active strategies with tasks - using {self.min_check_interval_minutes}min interval")
return self.min_check_interval_minutes
else:
logger.info(f"No active strategies with tasks - using {self.max_check_interval_minutes}min interval")
return self.max_check_interval_minutes
except Exception as e:
logger.warning(f"Error determining optimal interval: {e}, using default {self.min_check_interval_minutes}min")
finally:
if db:
db.close()
# Default to shorter interval on error (safer)
return self.min_check_interval_minutes
await check_and_execute_due_tasks(self)
async def _adjust_check_interval_if_needed(self, db: Session):
"""
Intelligently adjust check interval based on active strategies.
If there are active strategies with tasks, check more frequently.
If there are no active strategies, check less frequently.
Args:
db: Database session
"""
await adjust_check_interval_if_needed(self, db)
async def _execute_missed_jobs(self):
"""
Check for and execute any missed DateTrigger jobs that are still within grace period.
APScheduler marks jobs as 'missed' if they were scheduled to run while the scheduler wasn't running.
"""
try:
from services.active_strategy_service import ActiveStrategyService
all_jobs = self.scheduler.get_jobs()
now = datetime.utcnow().replace(tzinfo=self.scheduler.timezone)
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
self.stats['active_strategies_count'] = active_count
missed_jobs = []
for job in all_jobs:
# Only check DateTrigger jobs (one-time tasks)
if hasattr(job, 'trigger') and isinstance(job.trigger, DateTrigger):
if job.next_run_time and job.next_run_time < now:
# Job's scheduled time has passed
time_since_scheduled = (now - job.next_run_time).total_seconds()
# Check if still within grace period (1 hour = 3600 seconds)
if time_since_scheduled <= 3600:
missed_jobs.append(job)
# Determine optimal interval
if active_count > 0:
optimal_interval = self.min_check_interval_minutes
else:
optimal_interval = self.max_check_interval_minutes
# Only reschedule if interval needs to change
if optimal_interval != self.current_check_interval_minutes:
logger.info(
f"Adjusting scheduler interval: {self.current_check_interval_minutes}min → {optimal_interval}min | "
f"active_strategies={active_count}"
if missed_jobs:
logger.warning(
f"[Scheduler] Found {len(missed_jobs)} missed job(s) within grace period, executing now..."
)
# Reschedule the job with new interval
self.scheduler.modify_job(
'check_due_tasks',
trigger=self._get_trigger_for_interval(optimal_interval)
)
self.current_check_interval_minutes = optimal_interval
self.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
logger.info(f"Scheduler interval adjusted to {optimal_interval}min")
for job in missed_jobs:
try:
# Execute the job immediately
logger.info(f"[Scheduler] Executing missed job: {job.id}")
await job.func(*job.args, **job.kwargs)
except Exception as e:
logger.error(f"[Scheduler] Error executing missed job {job.id}: {e}")
except Exception as e:
logger.warning(f"Error adjusting check interval: {e}")
logger.warning(f"[Scheduler] Error checking for missed jobs: {e}")
async def trigger_interval_adjustment(self):
"""
@@ -315,14 +526,22 @@ class TaskScheduler:
try:
db = get_db_session()
if db:
await self._adjust_check_interval_if_needed(db)
await adjust_check_interval_if_needed(self, db)
db.close()
else:
logger.warning("Could not get database session for interval adjustment")
except Exception as e:
logger.warning(f"Error triggering interval adjustment: {e}")
async def _process_task_type(self, task_type: str, db: Session):
"""Process due tasks for a specific task type."""
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]:
"""
Process due tasks for a specific task type.
Returns:
Summary dict with 'found', 'executed', 'failed' counts, or None if no tasks
"""
summary = {'found': 0, 'executed': 0, 'failed': 0}
try:
# Get task loader for this type
try:
@@ -334,7 +553,7 @@ class TaskScheduler:
original_error=e
)
self.exception_handler.handle_exception(error)
return
return None
# Load due tasks (with error handling)
try:
@@ -346,28 +565,30 @@ class TaskScheduler:
original_error=e
)
self.exception_handler.handle_exception(error)
return
return None
if not due_tasks:
return
return None
summary['found'] = len(due_tasks)
self.stats['tasks_found'] += len(due_tasks)
logger.info(f"Found {len(due_tasks)} due tasks for type: {task_type}")
# Execute tasks (with concurrency limit)
execution_tasks = []
skipped_count = 0
for task in due_tasks:
if len(self.active_executions) >= self.max_concurrent_executions:
skipped_count = len(due_tasks) - len(execution_tasks)
logger.warning(
f"Max concurrent executions reached ({self.max_concurrent_executions}), "
f"skipping {len(due_tasks) - len(execution_tasks)} tasks"
f"[Scheduler] ⚠️ Max concurrent executions reached ({self.max_concurrent_executions}), "
f"skipping {skipped_count} tasks for {task_type}"
)
break
# Execute task asynchronously
# Note: Each task gets its own database session to prevent concurrent access issues
execution_task = asyncio.create_task(
self._execute_task_async(task_type, task)
execute_task_async(self, task_type, task, summary)
)
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
@@ -379,6 +600,8 @@ class TaskScheduler:
if execution_tasks:
await asyncio.wait(execution_tasks, timeout=300)
return summary
except Exception as e:
error = TaskLoaderError(
message=f"Error processing task type {task_type}: {str(e)}",
@@ -386,169 +609,8 @@ class TaskScheduler:
original_error=e
)
self.exception_handler.handle_exception(error)
return summary
async def _execute_task_async(self, task_type: str, task: Any):
"""
Execute a single task asynchronously with user isolation.
Each task gets its own database session to prevent concurrent access issues,
as SQLAlchemy sessions are not async-safe or concurrent-safe.
User context is extracted and tracked for user isolation.
Args:
task_type: Type of task
task: Task instance from database (detached from original session)
"""
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
db = None
user_id = None
try:
# Extract user context if available (for user isolation tracking)
try:
if hasattr(task, 'strategy') and task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id') and task.strategy_id:
# Will query user_id after we have db session
pass
except Exception as e:
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
logger.info(f"Executing task: {task_id} | user_id: {user_id}")
# Create a new database session for this async task
# SQLAlchemy sessions are not async-safe and cannot be shared across concurrent tasks
db = get_db_session()
if db is None:
error = DatabaseError(
message=f"Failed to get database session for task {task_id}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type
)
self.exception_handler.handle_exception(error, log_level="error")
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
return
# Set database session for exception handler
self.exception_handler.db = db
# Merge the detached task object into this session
# The task object was loaded in a different session and is now detached
from sqlalchemy.orm import object_session
if object_session(task) is None:
# Task is detached, need to merge it into this session
task = db.merge(task)
# Extract user_id after merge if not already available
if user_id is None and hasattr(task, 'strategy'):
try:
if task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id'):
# Query strategy if relationship not loaded
from models.enhanced_strategy_models import EnhancedContentStrategy
strategy = db.query(EnhancedContentStrategy).filter(
EnhancedContentStrategy.id == task.strategy_id
).first()
if strategy:
user_id = strategy.user_id
except Exception as e:
logger.debug(f"Could not extract user_id after merge for task {task_id}: {e}")
# Get executor for this task type
try:
executor = self.registry.get_executor(task_type)
except Exception as e:
from .exception_handler import SchedulerConfigError
error = SchedulerConfigError(
message=f"Failed to get executor for task type {task_type}: {str(e)}",
user_id=user_id,
context={
"task_id": getattr(task, 'id', None),
"task_type": task_type
},
original_error=e
)
self.exception_handler.handle_exception(error)
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
return
# Execute task with its own session (with error handling)
try:
result = await executor.execute_task(task, db)
# Handle result and update statistics
if result.success:
self.stats['tasks_executed'] += 1
self._update_user_stats(user_id, success=True)
logger.info(f"Task executed successfully: {task_id} | user_id: {user_id}")
else:
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
# Create structured error for failed execution
error = TaskExecutionError(
message=result.error_message or "Task execution failed",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
execution_time_ms=result.execution_time_ms,
context={"result_data": result.result_data}
)
self.exception_handler.handle_exception(error, log_level="warning")
# Retry logic if enabled
if self.enable_retries and result.retryable:
await self._schedule_retry(task, result.retry_delay)
except SchedulerException as e:
# Re-raise scheduler exceptions (they're already handled)
raise
except Exception as e:
# Wrap unexpected exceptions
error = TaskExecutionError(
message=f"Unexpected error during task execution: {str(e)}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
original_error=e
)
self.exception_handler.handle_exception(error)
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
except SchedulerException as e:
# Handle scheduler exceptions
self.exception_handler.handle_exception(e)
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
except Exception as e:
# Handle any other unexpected errors
error = TaskExecutionError(
message=f"Unexpected error in task execution wrapper: {str(e)}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
original_error=e
)
self.exception_handler.handle_exception(error)
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
finally:
# Clean up database session
if db:
try:
db.close()
except Exception as e:
logger.error(f"Error closing database session for task {task_id}: {e}")
# Remove from active executions
if task_id in self.active_executions:
del self.active_executions[task_id]
def _update_user_stats(self, user_id: Optional[int], success: bool):
"""
@@ -622,6 +684,117 @@ class TaskScheduler:
return base_stats
def schedule_one_time_task(
self,
func: Callable,
run_date: datetime,
job_id: str,
args: tuple = (),
kwargs: Dict[str, Any] = None,
replace_existing: bool = True
) -> str:
"""
Schedule a one-time task to run at a specific datetime.
Args:
func: Async function to execute
run_date: Datetime when the task should run (must be timezone-aware UTC)
job_id: Unique identifier for this job
args: Positional arguments to pass to func
kwargs: Keyword arguments to pass to func
replace_existing: If True, replace existing job with same ID
Returns:
Job ID
"""
if not self._running:
logger.warning(
f"Scheduler not running, but scheduling job {job_id} anyway. "
"APScheduler will start automatically when needed."
)
try:
# Ensure run_date is timezone-aware (UTC)
if run_date.tzinfo is None:
from datetime import timezone
run_date = run_date.replace(tzinfo=timezone.utc)
logger.debug(f"Added UTC timezone to run_date: {run_date}")
self.scheduler.add_job(
func,
trigger=DateTrigger(run_date=run_date),
args=args,
kwargs=kwargs or {},
id=job_id,
replace_existing=replace_existing,
misfire_grace_time=3600 # 1 hour grace period for missed jobs
)
# Get updated job count
all_jobs = self.scheduler.get_jobs()
one_time_jobs = [j for j in all_jobs if j.id != 'check_due_tasks']
# Extract user_id from kwargs if available for logging and job store
user_id = kwargs.get('user_id', None) if kwargs else None
func_name = func.__name__ if hasattr(func, '__name__') else str(func)
# Get job store name for user (if user_id provided)
job_store_name = 'default'
if user_id:
try:
db = get_db_session()
if db:
job_store_name = get_user_job_store_name(user_id, db)
db.close()
except Exception as e:
logger.warning(f"Could not determine job store for user {user_id}: {e}")
# Note: APScheduler doesn't support dynamic job store creation
# We use 'default' for all jobs but log the user's job store name for debugging
# The actual user isolation is handled through task filtering by user_id
# Log detailed one-time task scheduling information (use WARNING level for visibility)
log_message = (
f"[Scheduler] 📅 Scheduled One-Time Task\n"
f" ├─ Job ID: {job_id}\n"
f" ├─ Function: {func_name}\n"
f" ├─ User ID: {user_id or 'system'}\n"
f" ├─ Job Store: {job_store_name} (user context)\n"
f" ├─ Scheduled For: {run_date}\n"
f" ├─ Replace Existing: {replace_existing}\n"
f" ├─ Total One-Time Jobs: {len(one_time_jobs)}\n"
f" └─ Total Scheduled Jobs: {len(all_jobs)}"
)
logger.warning(log_message)
# Log job scheduling to event log for dashboard
try:
event_db = get_db_session()
if event_db:
event_log = SchedulerEventLog(
event_type='job_scheduled',
event_date=datetime.utcnow(),
job_id=job_id,
job_type='one_time',
user_id=user_id,
event_data={
'function_name': func_name,
'job_store': job_store_name,
'scheduled_for': run_date.isoformat(),
'replace_existing': replace_existing
}
)
event_db.add(event_log)
event_db.commit()
event_db.close()
except Exception as e:
logger.debug(f"Failed to log job scheduling event: {e}")
return job_id
except Exception as e:
logger.error(f"Failed to schedule one-time task {job_id}: {e}")
raise
def is_running(self) -> bool:
"""Check if scheduler is running."""
return self._running

View File

@@ -0,0 +1,197 @@
"""
Task Execution Handler
Handles asynchronous execution of individual tasks with proper session isolation.
"""
from typing import TYPE_CHECKING, Any, Dict, Optional
from sqlalchemy.orm import object_session
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from .exception_handler import (
SchedulerException, TaskExecutionError, DatabaseError, SchedulerConfigError
)
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("task_execution_handler")
async def execute_task_async(
scheduler: 'TaskScheduler',
task_type: str,
task: Any,
summary: Optional[Dict[str, Any]] = None
):
"""
Execute a single task asynchronously with user isolation.
Each task gets its own database session to prevent concurrent access issues,
as SQLAlchemy sessions are not async-safe or concurrent-safe.
User context is extracted and tracked for user isolation.
Args:
scheduler: TaskScheduler instance
task_type: Type of task
task: Task instance from database (detached from original session)
summary: Optional summary dict to update with execution results
"""
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
db = None
user_id = None
try:
# Extract user context if available (for user isolation tracking)
try:
if hasattr(task, 'strategy') and task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id') and task.strategy_id:
# Will query user_id after we have db session
pass
except Exception as e:
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
# Log task execution start (detailed for important tasks)
task_db_id = getattr(task, 'id', None)
if task_db_id:
logger.debug(f"[Scheduler] ▶️ Executing {task_type} task {task_db_id} | user_id: {user_id}")
# Create a new database session for this async task
# SQLAlchemy sessions are not async-safe and cannot be shared across concurrent tasks
db = get_db_session()
if db is None:
error = DatabaseError(
message=f"Failed to get database session for task {task_id}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type
)
scheduler.exception_handler.handle_exception(error, log_level="error")
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
return
# Set database session for exception handler
scheduler.exception_handler.db = db
# Merge the detached task object into this session
# The task object was loaded in a different session and is now detached
if object_session(task) is None:
# Task is detached, need to merge it into this session
task = db.merge(task)
# Extract user_id after merge if not already available
if user_id is None and hasattr(task, 'strategy'):
try:
if task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id'):
# Query strategy if relationship not loaded
from models.enhanced_strategy_models import EnhancedContentStrategy
strategy = db.query(EnhancedContentStrategy).filter(
EnhancedContentStrategy.id == task.strategy_id
).first()
if strategy:
user_id = strategy.user_id
except Exception as e:
logger.debug(f"Could not extract user_id after merge for task {task_id}: {e}")
# Get executor for this task type
try:
executor = scheduler.registry.get_executor(task_type)
except Exception as e:
error = SchedulerConfigError(
message=f"Failed to get executor for task type {task_type}: {str(e)}",
user_id=user_id,
context={
"task_id": getattr(task, 'id', None),
"task_type": task_type
},
original_error=e
)
scheduler.exception_handler.handle_exception(error)
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
return
# Execute task with its own session (with error handling)
try:
result = await executor.execute_task(task, db)
# Handle result and update statistics
if result.success:
scheduler.stats['tasks_executed'] += 1
scheduler._update_user_stats(user_id, success=True)
if summary:
summary['executed'] += 1
logger.debug(f"[Scheduler] ✅ Task {task_id} executed successfully | user_id: {user_id} | time: {result.execution_time_ms}ms")
else:
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
if summary:
summary['failed'] += 1
# Create structured error for failed execution
error = TaskExecutionError(
message=result.error_message or "Task execution failed",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
execution_time_ms=result.execution_time_ms,
context={"result_data": result.result_data}
)
scheduler.exception_handler.handle_exception(error, log_level="warning")
logger.warning(f"[Scheduler] ❌ Task {task_id} failed | user_id: {user_id} | error: {result.error_message}")
# Retry logic if enabled
if scheduler.enable_retries and result.retryable:
await scheduler._schedule_retry(task, result.retry_delay)
except SchedulerException as e:
# Re-raise scheduler exceptions (they're already handled)
raise
except Exception as e:
# Wrap unexpected exceptions
error = TaskExecutionError(
message=f"Unexpected error during task execution: {str(e)}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
original_error=e
)
scheduler.exception_handler.handle_exception(error)
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
except SchedulerException as e:
# Handle scheduler exceptions
scheduler.exception_handler.handle_exception(e)
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
except Exception as e:
# Handle any other unexpected errors
error = TaskExecutionError(
message=f"Unexpected error in task execution wrapper: {str(e)}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
original_error=e
)
scheduler.exception_handler.handle_exception(error)
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
finally:
# Clean up database session
if db:
try:
db.close()
except Exception as e:
logger.error(f"Error closing database session for task {task_id}: {e}")
# Remove from active executions
if task_id in scheduler.active_executions:
del scheduler.active_executions[task_id]

View File

@@ -0,0 +1,756 @@
"""
OAuth Token Monitoring Task Executor
Handles execution of OAuth token monitoring tasks for connected platforms.
"""
import logging
import os
import time
from datetime import datetime, timedelta
from typing import Dict, Any, Optional
from sqlalchemy.orm import Session
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask, OAuthTokenExecutionLog
from models.subscription_models import UsageAlert
from utils.logger_utils import get_service_logger
# Import platform-specific services
from services.gsc_service import GSCService
from services.integrations.bing_oauth import BingOAuthService
from services.integrations.wordpress_oauth import WordPressOAuthService
from services.wix_service import WixService
logger = get_service_logger("oauth_token_monitoring_executor")
class OAuthTokenMonitoringExecutor(TaskExecutor):
"""
Executor for OAuth token monitoring tasks.
Handles:
- Checking token validity and expiration
- Attempting automatic token refresh
- Logging results and updating task status
- One-time refresh attempt (no automatic retries on failure)
"""
def __init__(self):
self.logger = logger
self.exception_handler = SchedulerExceptionHandler()
# Expiration warning window (7 days before expiration)
self.expiration_warning_days = 7
async def execute_task(self, task: OAuthTokenMonitoringTask, db: Session) -> TaskExecutionResult:
"""
Execute an OAuth token monitoring task.
This checks token status and attempts refresh if needed.
If refresh fails, marks task as failed and does not retry automatically.
Args:
task: OAuthTokenMonitoringTask instance
db: Database session
Returns:
TaskExecutionResult
"""
start_time = time.time()
user_id = task.user_id
platform = task.platform
try:
self.logger.info(
f"Executing OAuth token monitoring: task_id={task.id} | "
f"user_id={user_id} | platform={platform}"
)
# Create execution log
execution_log = OAuthTokenExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='running'
)
db.add(execution_log)
db.flush()
# Check and refresh token
result = await self._check_and_refresh_token(task, db)
# Update execution log
execution_time_ms = int((time.time() - start_time) * 1000)
execution_log.status = 'success' if result.success else 'failed'
execution_log.result_data = result.result_data
execution_log.error_message = result.error_message
execution_log.execution_time_ms = execution_time_ms
# Update task based on result
task.last_check = datetime.utcnow()
if result.success:
task.last_success = datetime.utcnow()
task.status = 'active'
task.failure_reason = None
# Schedule next check (7 days from now)
task.next_check = self.calculate_next_execution(
task=task,
frequency='Weekly',
last_execution=task.last_check
)
else:
# Refresh failed - mark as failed and stop automatic retries
task.last_failure = datetime.utcnow()
task.failure_reason = result.error_message
task.status = 'failed'
# Do NOT update next_check - wait for manual trigger
self.logger.warning(
f"OAuth token refresh failed for user {user_id}, platform {platform}. "
f"Task marked as failed. No automatic retry will be scheduled."
)
# Create UsageAlert notification for the user
self._create_failure_alert(user_id, platform, result.error_message, result.result_data, db)
task.updated_at = datetime.utcnow()
db.commit()
return result
except Exception as e:
execution_time_ms = int((time.time() - start_time) * 1000)
# Set database session for exception handler
self.exception_handler.db = db
# Create structured error
error = TaskExecutionError(
message=f"Error executing OAuth token monitoring task {task.id}: {str(e)}",
user_id=user_id,
task_id=task.id,
task_type="oauth_token_monitoring",
execution_time_ms=execution_time_ms,
context={
"platform": platform,
"user_id": user_id
},
original_error=e
)
# Handle exception with structured logging
self.exception_handler.handle_exception(error)
# Update execution log with error
try:
execution_log = OAuthTokenExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='failed',
error_message=str(e),
execution_time_ms=execution_time_ms,
result_data={
"error_type": error.error_type.value,
"severity": error.severity.value,
"context": error.context
}
)
db.add(execution_log)
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
task.status = 'failed'
task.last_check = datetime.utcnow()
task.updated_at = datetime.utcnow()
# Do NOT update next_check - wait for manual trigger
# Create UsageAlert notification for the user
self._create_failure_alert(user_id, task.platform, str(e), None, db)
db.commit()
except Exception as commit_error:
db_error = DatabaseError(
message=f"Error saving execution log: {str(commit_error)}",
user_id=user_id,
task_id=task.id,
original_error=commit_error
)
self.exception_handler.handle_exception(db_error)
db.rollback()
return TaskExecutionResult(
success=False,
error_message=str(e),
execution_time_ms=execution_time_ms,
retryable=False, # Do not retry automatically
retry_delay=0
)
async def _check_and_refresh_token(
self,
task: OAuthTokenMonitoringTask,
db: Session
) -> TaskExecutionResult:
"""
Check token status and attempt refresh if needed.
Tokens are stored in the database from onboarding step 5:
- GSC: gsc_credentials table (via GSCService)
- Bing: bing_oauth_tokens table (via BingOAuthService)
- WordPress: wordpress_oauth_tokens table (via WordPressOAuthService)
- Wix: Currently in frontend sessionStorage (backend storage TODO)
Args:
task: OAuthTokenMonitoringTask instance
db: Database session
Returns:
TaskExecutionResult with success status and details
"""
platform = task.platform
user_id = task.user_id
try:
self.logger.info(f"Checking token for platform: {platform}, user: {user_id}")
# Route to platform-specific checking logic
if platform == 'gsc':
return await self._check_gsc_token(user_id)
elif platform == 'bing':
return await self._check_bing_token(user_id)
elif platform == 'wordpress':
return await self._check_wordpress_token(user_id)
elif platform == 'wix':
return await self._check_wix_token(user_id)
else:
return TaskExecutionResult(
success=False,
error_message=f"Unsupported platform: {platform}",
result_data={
'platform': platform,
'user_id': user_id,
'error': 'Unsupported platform'
},
retryable=False
)
except Exception as e:
self.logger.error(
f"Error checking/refreshing token for platform {platform}, user {user_id}: {e}",
exc_info=True
)
return TaskExecutionResult(
success=False,
error_message=f"Token check failed: {str(e)}",
result_data={
'platform': platform,
'user_id': user_id,
'error': str(e)
},
retryable=False # Do not retry automatically
)
async def _check_gsc_token(self, user_id: str) -> TaskExecutionResult:
"""
Check and refresh GSC (Google Search Console) token.
GSC service auto-refreshes tokens if expired when loading credentials.
"""
try:
# Use absolute database path for consistency with onboarding
db_path = os.path.abspath("alwrity.db")
gsc_service = GSCService(db_path=db_path)
credentials = gsc_service.load_user_credentials(user_id)
if not credentials:
return TaskExecutionResult(
success=False,
error_message="GSC credentials not found or could not be loaded",
result_data={
'platform': 'gsc',
'user_id': user_id,
'status': 'not_found',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
# GSC service auto-refreshes if expired, so if we get here, token is valid
result_data = {
'platform': 'gsc',
'user_id': user_id,
'status': 'valid',
'check_time': datetime.utcnow().isoformat(),
'message': 'GSC token is valid (auto-refreshed if expired)'
}
return TaskExecutionResult(
success=True,
result_data=result_data
)
except Exception as e:
self.logger.error(f"Error checking GSC token for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"GSC token check failed: {str(e)}",
result_data={
'platform': 'gsc',
'user_id': user_id,
'error': str(e)
},
retryable=False
)
async def _check_bing_token(self, user_id: str) -> TaskExecutionResult:
"""
Check and refresh Bing Webmaster Tools token.
Checks token expiration and attempts refresh if needed.
"""
try:
# Use absolute database path for consistency with onboarding
db_path = os.path.abspath("alwrity.db")
bing_service = BingOAuthService(db_path=db_path)
# Get token status (includes expired tokens)
token_status = bing_service.get_user_token_status(user_id)
if not token_status.get('has_tokens'):
return TaskExecutionResult(
success=False,
error_message="No Bing tokens found for user",
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'not_found',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
active_tokens = token_status.get('active_tokens', [])
expired_tokens = token_status.get('expired_tokens', [])
# If we have active tokens, check if any are expiring soon (< 7 days)
if active_tokens:
now = datetime.utcnow()
needs_refresh = False
token_to_refresh = None
for token in active_tokens:
expires_at_str = token.get('expires_at')
if expires_at_str:
try:
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
# Check if expires within warning window (7 days)
days_until_expiry = (expires_at - now).days
if days_until_expiry < self.expiration_warning_days:
needs_refresh = True
token_to_refresh = token
break
except Exception:
# If parsing fails, assume token is valid
pass
if needs_refresh and token_to_refresh:
# Attempt to refresh
refresh_token = token_to_refresh.get('refresh_token')
if refresh_token:
refresh_result = bing_service.refresh_access_token(user_id, refresh_token)
if refresh_result:
return TaskExecutionResult(
success=True,
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'refreshed',
'check_time': datetime.utcnow().isoformat(),
'message': 'Bing token refreshed successfully'
}
)
else:
return TaskExecutionResult(
success=False,
error_message="Failed to refresh Bing token",
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'refresh_failed',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
# Token is valid and not expiring soon
return TaskExecutionResult(
success=True,
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'valid',
'check_time': datetime.utcnow().isoformat(),
'message': 'Bing token is valid'
}
)
# No active tokens, check if we can refresh expired ones
if expired_tokens:
# Try to refresh the most recent expired token
latest_token = expired_tokens[0] # Already sorted by created_at DESC
refresh_token = latest_token.get('refresh_token')
if refresh_token:
# Check if token expired recently (within grace period)
expires_at_str = latest_token.get('expires_at')
if expires_at_str:
try:
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
# Only refresh if expired within last 24 hours (grace period)
hours_since_expiry = (datetime.utcnow() - expires_at).total_seconds() / 3600
if hours_since_expiry < 24:
refresh_result = bing_service.refresh_access_token(user_id, refresh_token)
if refresh_result:
return TaskExecutionResult(
success=True,
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'refreshed',
'check_time': datetime.utcnow().isoformat(),
'message': 'Bing token refreshed from expired state'
}
)
except Exception:
pass
return TaskExecutionResult(
success=False,
error_message="Bing token expired and could not be refreshed",
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'expired',
'check_time': datetime.utcnow().isoformat(),
'message': 'Bing token expired. User needs to reconnect.'
},
retryable=False
)
return TaskExecutionResult(
success=False,
error_message="No valid Bing tokens found",
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'invalid',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
except Exception as e:
self.logger.error(f"Error checking Bing token for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"Bing token check failed: {str(e)}",
result_data={
'platform': 'bing',
'user_id': user_id,
'error': str(e)
},
retryable=False
)
async def _check_wordpress_token(self, user_id: str) -> TaskExecutionResult:
"""
Check WordPress token validity.
Note: WordPress tokens cannot be refreshed. They expire after 2 weeks
and require user re-authorization. We only check if token is valid.
"""
try:
# Use absolute database path for consistency with onboarding
db_path = os.path.abspath("alwrity.db")
wordpress_service = WordPressOAuthService(db_path=db_path)
tokens = wordpress_service.get_user_tokens(user_id)
if not tokens:
return TaskExecutionResult(
success=False,
error_message="No WordPress tokens found for user",
result_data={
'platform': 'wordpress',
'user_id': user_id,
'status': 'not_found',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
# Check each token - WordPress tokens expire in 2 weeks
now = datetime.utcnow()
valid_tokens = []
expiring_soon = []
expired_tokens = []
for token in tokens:
expires_at_str = token.get('expires_at')
if expires_at_str:
try:
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
days_until_expiry = (expires_at - now).days
if days_until_expiry < 0:
expired_tokens.append(token)
elif days_until_expiry < self.expiration_warning_days:
expiring_soon.append(token)
else:
valid_tokens.append(token)
except Exception:
# If parsing fails, test token validity via API
access_token = token.get('access_token')
if access_token and wordpress_service.test_token(access_token):
valid_tokens.append(token)
else:
expired_tokens.append(token)
else:
# No expiration date - test token validity
access_token = token.get('access_token')
if access_token and wordpress_service.test_token(access_token):
valid_tokens.append(token)
else:
expired_tokens.append(token)
if valid_tokens:
return TaskExecutionResult(
success=True,
result_data={
'platform': 'wordpress',
'user_id': user_id,
'status': 'valid',
'check_time': datetime.utcnow().isoformat(),
'message': 'WordPress token is valid',
'valid_tokens_count': len(valid_tokens)
}
)
elif expiring_soon:
# WordPress tokens cannot be refreshed - user needs to reconnect
return TaskExecutionResult(
success=False,
error_message="WordPress token expiring soon and cannot be auto-refreshed",
result_data={
'platform': 'wordpress',
'user_id': user_id,
'status': 'expiring_soon',
'check_time': datetime.utcnow().isoformat(),
'message': 'WordPress token expires soon. User needs to reconnect (WordPress tokens cannot be auto-refreshed).'
},
retryable=False
)
else:
return TaskExecutionResult(
success=False,
error_message="WordPress token expired and cannot be refreshed",
result_data={
'platform': 'wordpress',
'user_id': user_id,
'status': 'expired',
'check_time': datetime.utcnow().isoformat(),
'message': 'WordPress token expired. User needs to reconnect (WordPress tokens cannot be auto-refreshed).'
},
retryable=False
)
except Exception as e:
self.logger.error(f"Error checking WordPress token for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"WordPress token check failed: {str(e)}",
result_data={
'platform': 'wordpress',
'user_id': user_id,
'error': str(e)
},
retryable=False
)
async def _check_wix_token(self, user_id: str) -> TaskExecutionResult:
"""
Check Wix token validity.
Note: Wix tokens are currently stored in frontend sessionStorage.
Backend storage needs to be implemented for automated checking.
"""
try:
# TODO: Wix tokens are stored in frontend sessionStorage, not backend database
# Once backend storage is implemented, we can check tokens here
# For now, return not supported
return TaskExecutionResult(
success=False,
error_message="Wix token monitoring not yet supported - tokens stored in frontend sessionStorage",
result_data={
'platform': 'wix',
'user_id': user_id,
'status': 'not_supported',
'check_time': datetime.utcnow().isoformat(),
'message': 'Wix token monitoring requires backend token storage implementation'
},
retryable=False
)
except Exception as e:
self.logger.error(f"Error checking Wix token for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"Wix token check failed: {str(e)}",
result_data={
'platform': 'wix',
'user_id': user_id,
'error': str(e)
},
retryable=False
)
def _create_failure_alert(
self,
user_id: str,
platform: str,
error_message: str,
result_data: Optional[Dict[str, Any]],
db: Session
):
"""
Create a UsageAlert notification when OAuth token refresh fails.
Args:
user_id: User ID
platform: Platform identifier (gsc, bing, wordpress, wix)
error_message: Error message from token check
result_data: Optional result data from token check
db: Database session
"""
try:
# Determine severity based on error type
status = result_data.get('status', 'unknown') if result_data else 'unknown'
if status in ['expired', 'refresh_failed']:
severity = 'error'
alert_type = 'oauth_token_failure'
elif status in ['expiring_soon', 'not_found']:
severity = 'warning'
alert_type = 'oauth_token_warning'
else:
severity = 'error'
alert_type = 'oauth_token_failure'
# Format platform name for display
platform_names = {
'gsc': 'Google Search Console',
'bing': 'Bing Webmaster Tools',
'wordpress': 'WordPress',
'wix': 'Wix'
}
platform_display = platform_names.get(platform, platform.upper())
# Create alert title and message
if status == 'expired':
title = f"{platform_display} Token Expired"
message = (
f"Your {platform_display} access token has expired and could not be automatically renewed. "
f"Please reconnect your {platform_display} account to continue using this integration."
)
elif status == 'expiring_soon':
title = f"{platform_display} Token Expiring Soon"
message = (
f"Your {platform_display} access token will expire soon. "
f"Please reconnect your {platform_display} account to avoid interruption."
)
elif status == 'refresh_failed':
title = f"{platform_display} Token Renewal Failed"
message = (
f"Failed to automatically renew your {platform_display} access token. "
f"Please reconnect your {platform_display} account. "
f"Error: {error_message}"
)
elif status == 'not_found':
title = f"{platform_display} Token Not Found"
message = (
f"No {platform_display} access token found. "
f"Please connect your {platform_display} account in the onboarding settings."
)
else:
title = f"{platform_display} Token Error"
message = (
f"An error occurred while checking your {platform_display} access token. "
f"Please reconnect your {platform_display} account. "
f"Error: {error_message}"
)
# Get current billing period (YYYY-MM format)
from datetime import datetime
billing_period = datetime.utcnow().strftime("%Y-%m")
# Create UsageAlert
alert = UsageAlert(
user_id=user_id,
alert_type=alert_type,
threshold_percentage=0, # Not applicable for OAuth alerts
provider=None, # Not applicable for OAuth alerts
title=title,
message=message,
severity=severity,
is_sent=False, # Will be marked as sent when frontend polls
is_read=False,
billing_period=billing_period
)
db.add(alert)
# Note: We don't commit here - let the caller commit
# This allows the alert to be created atomically with the task update
self.logger.info(
f"Created UsageAlert for OAuth token failure: user={user_id}, "
f"platform={platform}, severity={severity}"
)
except Exception as e:
# Don't fail the entire task execution if alert creation fails
self.logger.error(
f"Failed to create UsageAlert for OAuth token failure: {e}",
exc_info=True
)
def calculate_next_execution(
self,
task: OAuthTokenMonitoringTask,
frequency: str,
last_execution: Optional[datetime] = None
) -> datetime:
"""
Calculate next execution time based on frequency.
For OAuth token monitoring, frequency is always 'Weekly' (7 days).
Args:
task: OAuthTokenMonitoringTask instance
frequency: Frequency string (should be 'Weekly' for token monitoring)
last_execution: Last execution datetime (defaults to task.last_check or now)
Returns:
Next execution datetime
"""
if last_execution is None:
last_execution = task.last_check if task.last_check else datetime.utcnow()
# OAuth token monitoring is always weekly (7 days)
if frequency == 'Weekly':
return last_execution + timedelta(days=7)
else:
# Default to weekly if frequency is not recognized
self.logger.warning(
f"Unknown frequency '{frequency}' for OAuth token monitoring task {task.id}. "
f"Defaulting to Weekly (7 days)."
)
return last_execution + timedelta(days=7)

View File

@@ -1,4 +1,12 @@
"""
Scheduler utilities.
Scheduler Utilities Package
"""
from .task_loader import load_due_monitoring_tasks
from .user_job_store import extract_domain_root, get_user_job_store_name
__all__ = [
'load_due_monitoring_tasks',
'extract_domain_root',
'get_user_job_store_name'
]

View File

@@ -0,0 +1,54 @@
"""
OAuth Token Monitoring Task Loader
Functions to load due OAuth token monitoring tasks from database.
"""
from datetime import datetime
from typing import List, Optional, Union
from sqlalchemy.orm import Session
from sqlalchemy import and_, or_
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
def load_due_oauth_token_monitoring_tasks(
db: Session,
user_id: Optional[Union[str, int]] = None
) -> List[OAuthTokenMonitoringTask]:
"""
Load all OAuth token monitoring tasks that are due for execution.
Criteria:
- status == 'active' (only check active tasks)
- next_check <= now (or is None for first execution)
- Optional: user_id filter for specific user (for user isolation)
User isolation is enforced through filtering by user_id when provided.
If no user_id is provided, loads tasks for all users (for system-wide monitoring).
Args:
db: Database session
user_id: Optional user ID (Clerk string) to filter tasks (if None, loads all users' tasks)
Returns:
List of due OAuthTokenMonitoringTask instances
"""
now = datetime.utcnow()
# Build query for due tasks
query = db.query(OAuthTokenMonitoringTask).filter(
and_(
OAuthTokenMonitoringTask.status == 'active',
or_(
OAuthTokenMonitoringTask.next_check <= now,
OAuthTokenMonitoringTask.next_check.is_(None)
)
)
)
# Apply user filter if provided (for user isolation)
if user_id is not None:
query = query.filter(OAuthTokenMonitoringTask.user_id == str(user_id))
return query.all()

View File

@@ -4,7 +4,7 @@ Functions to load due tasks from database.
"""
from datetime import datetime
from typing import List, Optional
from typing import List, Optional, Union
from sqlalchemy.orm import Session, joinedload
from sqlalchemy import and_, or_
@@ -14,7 +14,7 @@ from models.enhanced_strategy_models import EnhancedContentStrategy
def load_due_monitoring_tasks(
db: Session,
user_id: Optional[int] = None
user_id: Optional[Union[str, int]] = None
) -> List[MonitoringTask]:
"""
Load all monitoring tasks that are due for execution.
@@ -22,14 +22,17 @@ def load_due_monitoring_tasks(
Criteria:
- status == 'active'
- next_execution <= now (or is None for first execution)
- Optional: user_id filter for specific user (for future admin features)
- Optional: user_id filter for specific user (for user isolation)
Note: Strategy relationship is eagerly loaded to ensure user_id is accessible
during task execution for user isolation.
User isolation is enforced through filtering by user_id when provided.
If no user_id is provided, loads tasks for all users (for system-wide monitoring).
Args:
db: Database session
user_id: Optional user ID to filter tasks (if None, loads all users' tasks)
user_id: Optional user ID (Clerk string or int) to filter tasks (if None, loads all users' tasks)
Returns:
List of due MonitoringTask instances with strategy relationship loaded

View File

@@ -0,0 +1,129 @@
"""
User Job Store Utilities
Utilities for managing per-user job stores based on website root.
"""
from typing import Optional
from urllib.parse import urlparse
from loguru import logger
from sqlalchemy.orm import Session as SQLSession
from services.database import get_db_session
from models.onboarding import OnboardingSession, WebsiteAnalysis
def extract_domain_root(url: str) -> str:
"""
Extract domain root from a website URL for use as job store identifier.
Examples:
https://www.example.com -> example
https://blog.example.com -> example
https://example.co.uk -> example
http://subdomain.example.com/path -> example
Args:
url: Website URL
Returns:
Domain root (e.g., 'example') or 'default' if extraction fails
"""
try:
parsed = urlparse(url)
hostname = parsed.netloc or parsed.path.split('/')[0]
# Remove www. prefix if present
if hostname.startswith('www.'):
hostname = hostname[4:]
# Split by dots and get the root domain
# For example.com -> example, for example.co.uk -> example
parts = hostname.split('.')
if len(parts) >= 2:
# Handle common TLDs that might be part of domain (e.g., co.uk)
if len(parts) >= 3 and parts[-2] in ['co', 'com', 'net', 'org']:
root = parts[-3]
else:
root = parts[-2]
else:
root = parts[0] if parts else 'default'
# Clean and validate root
root = root.lower().strip()
# Remove invalid characters for job store name
root = ''.join(c for c in root if c.isalnum() or c in ['-', '_'])
if not root or len(root) < 2:
return 'default'
return root
except Exception as e:
logger.warning(f"Failed to extract domain root from URL '{url}': {e}")
return 'default'
def get_user_job_store_name(user_id: str, db: SQLSession = None) -> str:
"""
Get job store name for a user based on their website root from onboarding.
Args:
user_id: User ID (Clerk string)
db: Optional database session (will create if not provided)
Returns:
Job store name (e.g., 'example' or 'default')
"""
db_session = db
close_db = False
try:
if not db_session:
db_session = get_db_session()
close_db = True
if not db_session:
logger.warning(f"Could not get database session for user {user_id}, using default job store")
return 'default'
# Get user's website URL from onboarding
# Query directly since user_id is a string (Clerk ID)
onboarding_session = db_session.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not onboarding_session:
logger.debug(
f"[Job Store] No onboarding session found for user {user_id}, using default job store. "
f"This is normal if user hasn't completed onboarding."
)
return 'default'
# Get the latest website analysis for this session
website_analysis = db_session.query(WebsiteAnalysis).filter(
WebsiteAnalysis.session_id == onboarding_session.id
).order_by(WebsiteAnalysis.updated_at.desc()).first()
if not website_analysis or not website_analysis.website_url:
logger.debug(
f"[Job Store] No website URL found for user {user_id} (session_id: {onboarding_session.id}), "
f"using default job store. This is normal if website analysis wasn't completed."
)
return 'default'
website_url = website_analysis.website_url
domain_root = extract_domain_root(website_url)
logger.debug(f"Job store for user {user_id}: {domain_root} (from {website_url})")
return domain_root
except Exception as e:
logger.error(f"Error getting job store name for user {user_id}: {e}")
return 'default'
finally:
if close_db and db_session:
try:
db_session.close()
except Exception:
pass

View File

@@ -494,10 +494,8 @@ class LimitValidator:
display_provider_name = actual_provider_name or provider_name
logger.error(f"[Pre-flight Check] ✅ Operation {op_idx + 1}/{len(operations)}: {operation_type}")
logger.error(f" ├─ Provider: {display_provider_name} (enum: {provider_name})")
logger.error(f" ├─ Operation Index: {op_idx}")
logger.error(f" └─ Estimated Tokens Requested: {tokens_requested}")
# Log operation details at debug level (only when needed)
logger.debug(f"[Pre-flight] Operation {op_idx + 1}/{len(operations)}: {operation_type} ({display_provider_name}, {tokens_requested} tokens)")
# Check if this is an LLM provider
llm_providers = ['gemini', 'openai', 'anthropic', 'mistral']
@@ -563,13 +561,11 @@ class LimitValidator:
if result:
base_current_tokens = result[0] if result[0] is not None else 0
logger.error(f"[Pre-flight Check] ✅ Raw SQL query returned result: {result[0]} -> {base_current_tokens}")
else:
base_current_tokens = 0
logger.error(f"[Pre-flight Check] ⚠️ Raw SQL query returned None (no rows found)")
query_succeeded = True
logger.error(f"[Pre-flight Check] ✅ Raw SQL query succeeded for {provider_tokens_key}: {base_current_tokens}")
logger.debug(f"[Pre-flight] Raw SQL query for {provider_tokens_key}: {base_current_tokens}")
except Exception as sql_error:
logger.error(f" └─ Raw SQL query failed for {provider_tokens_key}: {type(sql_error).__name__}: {sql_error}", exc_info=True)
@@ -606,14 +602,8 @@ class LimitValidator:
if not query_succeeded:
logger.warning(f" └─ Both query methods failed, using 0 as fallback")
# CRITICAL LOG: Always log what we got from DB - this helps debug renewal issues
# Use ERROR level to ensure it shows even if INFO is filtered
logger.error(f"[Pre-flight Check] 🔍 Fresh DB Query for {display_provider_name}:")
logger.error(f" ├─ Column: {provider_tokens_key}")
logger.error(f" ├─ Billing Period: {current_period}")
logger.error(f" ├─ User ID: {user_id}")
logger.error(f" ├─ Method: {'Raw SQL' if query_succeeded and base_current_tokens >= 0 else 'ORM' if query_succeeded else 'Failed - using 0'}")
logger.error(f" └─ Value from DB: {base_current_tokens}")
# Log DB query result at debug level (only when needed for troubleshooting)
logger.debug(f"[Pre-flight] DB query for {display_provider_name} ({provider_tokens_key}): {base_current_tokens} (period: {current_period})")
# Add any projected tokens from previous operations in this validation run
# Note: total_llm_tokens tracks ONLY projected tokens from this run, not base DB value
@@ -622,16 +612,8 @@ class LimitValidator:
# Current tokens = base from DB + projected from previous operations in this run
current_provider_tokens = base_current_tokens + projected_from_previous
# Use ERROR level to ensure visibility
logger.error(f"[Pre-flight Check] 📊 Token Calculation for {display_provider_name}:")
logger.error(f" ├─ Base from DB (fresh query): {base_current_tokens}")
logger.error(f" ├─ Projected from previous ops in this run: {projected_from_previous}")
logger.error(f" └─ Total current tokens (base + projected): {current_provider_tokens}")
# Also check the initial usage object to see if it's being used incorrectly
if usage and hasattr(usage, provider_tokens_key):
initial_usage_value = getattr(usage, provider_tokens_key, 0) or 0
logger.error(f" ⚠️ Initial usage object value: {initial_usage_value} (this should NOT be used for fresh query)")
# Log token calculation at debug level
logger.debug(f"[Pre-flight] Token calc for {display_provider_name}: base={base_current_tokens}, projected={projected_from_previous}, total={current_provider_tokens}")
token_limit = limits.get(provider_tokens_key, 0) or 0
@@ -687,15 +669,10 @@ class LimitValidator:
if tokens_requested > 0:
# Add this operation's tokens to cumulative projected tokens
total_llm_tokens[provider_tokens_key] = projected_from_previous + tokens_requested
logger.error(f"[Pre-flight Check] 📝 Updated cumulative projected tokens for {display_provider_name}:")
logger.error(f" ├─ Previous projected: {projected_from_previous}")
logger.error(f" ├─ This operation requested: {tokens_requested}")
logger.error(f" ├─ New cumulative projected: {total_llm_tokens[provider_tokens_key]}")
logger.error(f" └─ Old value in dict was: {old_projected}")
logger.debug(f"[Pre-flight] Updated projected tokens for {display_provider_name}: {projected_from_previous} + {tokens_requested} = {total_llm_tokens[provider_tokens_key]}")
else:
# No tokens requested, keep existing projected tokens (or 0 if first operation)
total_llm_tokens[provider_tokens_key] = projected_from_previous
logger.error(f"[Pre-flight Check] 📝 No tokens requested, keeping projected at: {projected_from_previous}")
# Check image generation limits
elif provider == APIProvider.STABILITY:

View File

@@ -237,9 +237,10 @@ async def monitoring_middleware(request: Request, call_next):
# Check for authorization header with user info
elif 'authorization' in request.headers:
# Auth middleware should have set request.state.user_id
# If not, this indicates an authentication failure that should be logged
# If not, this indicates an authentication failure (likely expired token)
# Log at debug level to reduce noise - expired tokens are expected
user_id = None
logger.warning("Monitoring: Auth header present but no user_id in state - authentication may have failed")
logger.debug("Monitoring: Auth header present but no user_id in state - token likely expired")
# Final fallback: None (skip usage limits for truly anonymous/unauthenticated)
else:

View File

@@ -93,11 +93,7 @@ def validate_research_operations(
provider = usage_info.get('provider', llm_provider_name) if usage_info else llm_provider_name
operation_type = usage_info.get('operation_type', 'unknown')
logger.error(f"[Pre-flight Validator] ❌ RESEARCH WORKFLOW BLOCKED")
logger.error(f" ├─ User: {user_id}")
logger.error(f" ├─ Blocked at: {operation_type}")
logger.error(f" ├─ Provider: {provider}")
logger.error(f" └─ Reason: {message}")
logger.warning(f"[Pre-flight] Research blocked for user {user_id}: {operation_type} ({provider}) - {message}")
# Raise HTTPException immediately - frontend gets immediate response, no API calls made
raise HTTPException(