Scheduled research persona generation
This commit is contained in:
@@ -389,10 +389,19 @@ class ResearchService:
|
||||
exa_provider.track_exa_usage(user_id, cost)
|
||||
|
||||
# Extract content for downstream analysis
|
||||
# Handle None result case
|
||||
if raw_result is None:
|
||||
logger.error("raw_result is None after Exa search - this should not happen if HTTPException was raised")
|
||||
raise ValueError("Exa research result is None - search operation failed unexpectedly")
|
||||
|
||||
if not isinstance(raw_result, dict):
|
||||
logger.warning(f"raw_result is not a dict (type: {type(raw_result)}), using defaults")
|
||||
raw_result = {}
|
||||
|
||||
content = raw_result.get('content', '')
|
||||
sources = raw_result.get('sources', [])
|
||||
sources = raw_result.get('sources', []) or []
|
||||
search_widget = "" # Exa doesn't provide search widgets
|
||||
search_queries = raw_result.get('search_queries', [])
|
||||
search_queries = raw_result.get('search_queries', []) or []
|
||||
grounding_metadata = None # Exa doesn't provide grounding metadata
|
||||
|
||||
except RuntimeError as e:
|
||||
@@ -423,10 +432,15 @@ class ResearchService:
|
||||
|
||||
await task_manager.update_progress(task_id, "📊 Processing research results and extracting insights...")
|
||||
# Extract sources and content
|
||||
# Handle None result case
|
||||
if gemini_result is None:
|
||||
logger.error("gemini_result is None after search - this should not happen if HTTPException was raised")
|
||||
raise ValueError("Research result is None - search operation failed unexpectedly")
|
||||
|
||||
sources = self._extract_sources_from_grounding(gemini_result)
|
||||
content = gemini_result.get("content", "")
|
||||
search_widget = gemini_result.get("search_widget", "") or ""
|
||||
search_queries = gemini_result.get("search_queries", []) or []
|
||||
content = gemini_result.get("content", "") if isinstance(gemini_result, dict) else ""
|
||||
search_widget = gemini_result.get("search_widget", "") or "" if isinstance(gemini_result, dict) else ""
|
||||
search_queries = gemini_result.get("search_queries", []) or [] if isinstance(gemini_result, dict) else []
|
||||
grounding_metadata = self._extract_grounding_metadata(gemini_result)
|
||||
|
||||
# Continue with common analysis (same for both providers)
|
||||
@@ -548,8 +562,17 @@ class ResearchService:
|
||||
"""Extract sources from Gemini grounding metadata."""
|
||||
sources = []
|
||||
|
||||
# Handle None or invalid gemini_result
|
||||
if not gemini_result or not isinstance(gemini_result, dict):
|
||||
logger.warning("gemini_result is None or not a dict, returning empty sources")
|
||||
return sources
|
||||
|
||||
# The Gemini grounded provider already extracts sources and puts them in the 'sources' field
|
||||
raw_sources = gemini_result.get("sources", [])
|
||||
# Ensure raw_sources is a list (handle None case)
|
||||
if raw_sources is None:
|
||||
raw_sources = []
|
||||
|
||||
for src in raw_sources:
|
||||
source = ResearchSource(
|
||||
title=src.get("title", "Untitled"),
|
||||
@@ -570,6 +593,15 @@ class ResearchService:
|
||||
grounding_supports = []
|
||||
citations = []
|
||||
|
||||
# Handle None or invalid gemini_result
|
||||
if not gemini_result or not isinstance(gemini_result, dict):
|
||||
logger.warning("gemini_result is None or not a dict, returning empty grounding metadata")
|
||||
return GroundingMetadata(
|
||||
grounding_chunks=grounding_chunks,
|
||||
grounding_supports=grounding_supports,
|
||||
citations=citations
|
||||
)
|
||||
|
||||
# Extract grounding chunks from the raw grounding metadata
|
||||
raw_grounding = gemini_result.get("grounding_metadata", {})
|
||||
|
||||
@@ -577,7 +609,11 @@ class ResearchService:
|
||||
if hasattr(raw_grounding, 'grounding_chunks'):
|
||||
raw_chunks = raw_grounding.grounding_chunks
|
||||
else:
|
||||
raw_chunks = raw_grounding.get("grounding_chunks", [])
|
||||
raw_chunks = raw_grounding.get("grounding_chunks", []) if isinstance(raw_grounding, dict) else []
|
||||
|
||||
# Ensure raw_chunks is a list (handle None case)
|
||||
if raw_chunks is None:
|
||||
raw_chunks = []
|
||||
|
||||
for chunk in raw_chunks:
|
||||
if "web" in chunk:
|
||||
|
||||
179
backend/services/oauth_token_monitoring_service.py
Normal file
179
backend/services/oauth_token_monitoring_service.py
Normal file
@@ -0,0 +1,179 @@
|
||||
"""
|
||||
OAuth Token Monitoring Service
|
||||
Service for creating and managing OAuth token monitoring tasks.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from utils.logger_utils import get_service_logger
|
||||
import os
|
||||
|
||||
# Use service logger for consistent logging (WARNING level visible in production)
|
||||
logger = get_service_logger("oauth_token_monitoring")
|
||||
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
from services.gsc_service import GSCService
|
||||
from services.integrations.bing_oauth import BingOAuthService
|
||||
from services.integrations.wordpress_oauth import WordPressOAuthService
|
||||
|
||||
# Note: Wix tokens are stored in frontend sessionStorage, not backend database
|
||||
# So we cannot check for Wix connections from the backend yet
|
||||
|
||||
|
||||
def get_connected_platforms(user_id: str) -> List[str]:
|
||||
"""
|
||||
Detect which platforms are connected for a user by checking token storage.
|
||||
|
||||
Checks:
|
||||
- GSC: gsc_credentials table
|
||||
- Bing: bing_oauth_tokens table
|
||||
- WordPress: wordpress_oauth_tokens table
|
||||
- Wix: Not checked (tokens in frontend sessionStorage)
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
|
||||
Returns:
|
||||
List of connected platform identifiers: ['gsc', 'bing', 'wordpress', 'wix']
|
||||
"""
|
||||
connected = []
|
||||
|
||||
logger.warning(f"[OAuth Monitoring] Checking connected platforms for user: {user_id}")
|
||||
|
||||
try:
|
||||
# Check GSC - use absolute database path
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
logger.warning(f"[OAuth Monitoring] Checking GSC with db_path: {db_path}")
|
||||
gsc_service = GSCService(db_path=db_path)
|
||||
gsc_credentials = gsc_service.load_user_credentials(user_id)
|
||||
if gsc_credentials:
|
||||
connected.append('gsc')
|
||||
logger.warning(f"[OAuth Monitoring] ✅ GSC connected for user {user_id}")
|
||||
else:
|
||||
logger.warning(f"[OAuth Monitoring] ❌ GSC not connected for user {user_id} (no credentials found)")
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Monitoring] ⚠️ GSC check failed for user {user_id}: {e}", exc_info=True)
|
||||
|
||||
try:
|
||||
# Check Bing - use absolute database path
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
logger.warning(f"[OAuth Monitoring] Checking Bing with db_path: {db_path}")
|
||||
bing_service = BingOAuthService(db_path=db_path)
|
||||
token_status = bing_service.get_user_token_status(user_id)
|
||||
has_tokens = token_status.get('has_active_tokens', False)
|
||||
logger.warning(f"[OAuth Monitoring] Bing token_status keys: {list(token_status.keys())}, has_active_tokens: {has_tokens}")
|
||||
if has_tokens:
|
||||
connected.append('bing')
|
||||
logger.warning(f"[OAuth Monitoring] ✅ Bing connected for user {user_id}")
|
||||
else:
|
||||
logger.warning(f"[OAuth Monitoring] ❌ Bing not connected for user {user_id} (no active tokens)")
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Monitoring] ⚠️ Bing check failed for user {user_id}: {e}", exc_info=True)
|
||||
|
||||
try:
|
||||
# Check WordPress - use absolute database path
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
logger.warning(f"[OAuth Monitoring] Checking WordPress with db_path: {db_path}")
|
||||
wordpress_service = WordPressOAuthService(db_path=db_path)
|
||||
tokens = wordpress_service.get_user_tokens(user_id)
|
||||
logger.warning(f"[OAuth Monitoring] WordPress tokens found: {len(tokens) if tokens else 0}")
|
||||
if tokens and len(tokens) > 0:
|
||||
connected.append('wordpress')
|
||||
logger.warning(f"[OAuth Monitoring] ✅ WordPress connected for user {user_id} ({len(tokens)} token(s))")
|
||||
else:
|
||||
logger.warning(f"[OAuth Monitoring] ❌ WordPress not connected for user {user_id} (no tokens found)")
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Monitoring] ⚠️ WordPress check failed for user {user_id}: {e}", exc_info=True)
|
||||
|
||||
# Wix: Not checked (tokens in frontend sessionStorage)
|
||||
# TODO: Once backend storage is implemented, check wix_tokens table
|
||||
|
||||
logger.warning(f"[OAuth Monitoring] Connected platforms for user {user_id}: {connected}")
|
||||
return connected
|
||||
|
||||
|
||||
def create_oauth_monitoring_tasks(
|
||||
user_id: str,
|
||||
db: Session,
|
||||
platforms: Optional[List[str]] = None
|
||||
) -> List[OAuthTokenMonitoringTask]:
|
||||
"""
|
||||
Create OAuth token monitoring tasks for a user.
|
||||
|
||||
If platforms are not provided, automatically detects connected platforms.
|
||||
Creates one task per platform with next_check set to 7 days from now.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
db: Database session
|
||||
platforms: Optional list of platforms to create tasks for.
|
||||
If None, auto-detects connected platforms.
|
||||
Valid values: 'gsc', 'bing', 'wordpress', 'wix'
|
||||
|
||||
Returns:
|
||||
List of created OAuthTokenMonitoringTask instances
|
||||
"""
|
||||
try:
|
||||
# Auto-detect platforms if not provided
|
||||
if platforms is None:
|
||||
platforms = get_connected_platforms(user_id)
|
||||
logger.warning(f"[OAuth Monitoring] Auto-detected {len(platforms)} connected platforms for user {user_id}: {platforms}")
|
||||
else:
|
||||
logger.warning(f"[OAuth Monitoring] Creating monitoring tasks for specified platforms: {platforms}")
|
||||
|
||||
if not platforms:
|
||||
logger.warning(f"[OAuth Monitoring] No connected platforms found for user {user_id}. No monitoring tasks created.")
|
||||
return []
|
||||
|
||||
created_tasks = []
|
||||
now = datetime.utcnow()
|
||||
next_check = now + timedelta(days=7) # 7 days from now
|
||||
|
||||
for platform in platforms:
|
||||
# Check if task already exists for this user/platform combination
|
||||
existing_task = db.query(OAuthTokenMonitoringTask).filter(
|
||||
OAuthTokenMonitoringTask.user_id == user_id,
|
||||
OAuthTokenMonitoringTask.platform == platform
|
||||
).first()
|
||||
|
||||
if existing_task:
|
||||
logger.warning(
|
||||
f"[OAuth Monitoring] Monitoring task already exists for user {user_id}, platform {platform}. "
|
||||
f"Skipping creation."
|
||||
)
|
||||
continue
|
||||
|
||||
# Create new monitoring task
|
||||
task = OAuthTokenMonitoringTask(
|
||||
user_id=user_id,
|
||||
platform=platform,
|
||||
status='active',
|
||||
next_check=next_check,
|
||||
created_at=now,
|
||||
updated_at=now
|
||||
)
|
||||
|
||||
db.add(task)
|
||||
created_tasks.append(task)
|
||||
logger.warning(
|
||||
f"[OAuth Monitoring] Created OAuth token monitoring task for user {user_id}, "
|
||||
f"platform {platform}, next_check: {next_check.isoformat()}"
|
||||
)
|
||||
|
||||
db.commit()
|
||||
logger.warning(
|
||||
f"[OAuth Monitoring] Successfully created {len(created_tasks)} OAuth token monitoring tasks "
|
||||
f"for user {user_id}"
|
||||
)
|
||||
|
||||
return created_tasks
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error creating OAuth token monitoring tasks for user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
db.rollback()
|
||||
return []
|
||||
|
||||
@@ -26,12 +26,63 @@ class OnboardingDatabaseService:
|
||||
# Cache for schema feature detection
|
||||
self._brand_cols_checked: bool = False
|
||||
self._brand_cols_available: bool = False
|
||||
self._research_persona_cols_checked: bool = False
|
||||
self._research_persona_cols_available: bool = False
|
||||
|
||||
# --- Feature flags and schema detection helpers ---
|
||||
def _brand_feature_enabled(self) -> bool:
|
||||
"""Check if writing brand-related columns is enabled via env flag."""
|
||||
return os.getenv('ENABLE_WEBSITE_BRAND_COLUMNS', 'true').lower() in {'1', 'true', 'yes', 'on'}
|
||||
|
||||
def _ensure_research_persona_columns(self, session_db: Session) -> None:
|
||||
"""Ensure research_persona columns exist in persona_data table (runtime migration)."""
|
||||
if self._research_persona_cols_checked:
|
||||
return
|
||||
|
||||
try:
|
||||
# Check if columns exist using PRAGMA (SQLite) or information_schema (PostgreSQL)
|
||||
db_url = str(session_db.bind.url) if session_db.bind else ""
|
||||
|
||||
if 'sqlite' in db_url.lower():
|
||||
# SQLite: Use PRAGMA to check columns
|
||||
result = session_db.execute(text("PRAGMA table_info(persona_data)"))
|
||||
cols = {row[1] for row in result} # Column name is at index 1
|
||||
|
||||
if 'research_persona' not in cols:
|
||||
logger.info("Adding missing column research_persona to persona_data table")
|
||||
session_db.execute(text("ALTER TABLE persona_data ADD COLUMN research_persona JSON"))
|
||||
session_db.commit()
|
||||
|
||||
if 'research_persona_generated_at' not in cols:
|
||||
logger.info("Adding missing column research_persona_generated_at to persona_data table")
|
||||
session_db.execute(text("ALTER TABLE persona_data ADD COLUMN research_persona_generated_at TIMESTAMP"))
|
||||
session_db.commit()
|
||||
|
||||
self._research_persona_cols_available = True
|
||||
else:
|
||||
# PostgreSQL: Try to query the columns (will fail if they don't exist)
|
||||
try:
|
||||
session_db.execute(text("SELECT research_persona, research_persona_generated_at FROM persona_data LIMIT 0"))
|
||||
self._research_persona_cols_available = True
|
||||
except Exception:
|
||||
# Columns don't exist, add them
|
||||
logger.info("Adding missing columns research_persona and research_persona_generated_at to persona_data table")
|
||||
try:
|
||||
session_db.execute(text("ALTER TABLE persona_data ADD COLUMN research_persona JSONB"))
|
||||
session_db.execute(text("ALTER TABLE persona_data ADD COLUMN research_persona_generated_at TIMESTAMP"))
|
||||
session_db.commit()
|
||||
self._research_persona_cols_available = True
|
||||
except Exception as alter_err:
|
||||
logger.error(f"Failed to add research_persona columns: {alter_err}")
|
||||
session_db.rollback()
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error ensuring research_persona columns: {e}")
|
||||
session_db.rollback()
|
||||
raise
|
||||
finally:
|
||||
self._research_persona_cols_checked = True
|
||||
|
||||
def _ensure_brand_column_detection(self, session_db: Session) -> None:
|
||||
"""Detect at runtime whether brand columns exist and cache the result."""
|
||||
if self._brand_cols_checked:
|
||||
@@ -477,6 +528,9 @@ class OnboardingDatabaseService:
|
||||
if not session_db:
|
||||
raise ValueError("Database session required")
|
||||
|
||||
# Ensure research_persona columns exist before querying
|
||||
self._ensure_research_persona_columns(session_db)
|
||||
|
||||
try:
|
||||
session = self.get_session_by_user(user_id, session_db)
|
||||
if not session:
|
||||
|
||||
239
backend/services/persona/facebook/facebook_persona_scheduler.py
Normal file
239
backend/services/persona/facebook/facebook_persona_scheduler.py
Normal file
@@ -0,0 +1,239 @@
|
||||
"""
|
||||
Facebook Persona Scheduler
|
||||
Handles scheduled generation of Facebook personas after onboarding.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Dict, Any
|
||||
from loguru import logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.persona_data_service import PersonaDataService
|
||||
from services.persona.facebook.facebook_persona_service import FacebookPersonaService
|
||||
from services.onboarding.database_service import OnboardingDatabaseService
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
|
||||
|
||||
async def generate_facebook_persona_task(user_id: str):
|
||||
"""
|
||||
Async task function to generate Facebook persona for a user.
|
||||
|
||||
This function is called by the scheduler 20 minutes after onboarding completion.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
"""
|
||||
db = None
|
||||
try:
|
||||
logger.info(f"Scheduled Facebook persona generation started for user {user_id}")
|
||||
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.error(f"Failed to get database session for Facebook persona generation (user: {user_id})")
|
||||
return
|
||||
|
||||
# Get persona data service
|
||||
persona_data_service = PersonaDataService(db_session=db)
|
||||
onboarding_service = OnboardingDatabaseService(db=db)
|
||||
|
||||
# Get core persona (required for Facebook persona)
|
||||
persona_data = persona_data_service.get_user_persona_data(user_id)
|
||||
if not persona_data or not persona_data.get('core_persona'):
|
||||
logger.warning(f"No core persona found for user {user_id}, cannot generate Facebook persona")
|
||||
return
|
||||
|
||||
core_persona = persona_data.get('core_persona', {})
|
||||
|
||||
# Get onboarding data for context
|
||||
website_analysis = onboarding_service.get_website_analysis(user_id, db)
|
||||
research_prefs = onboarding_service.get_research_preferences(user_id, db)
|
||||
|
||||
onboarding_data = {
|
||||
"website_url": website_analysis.get('website_url', '') if website_analysis else '',
|
||||
"writing_style": website_analysis.get('writing_style', {}) if website_analysis else {},
|
||||
"content_characteristics": website_analysis.get('content_characteristics', {}) if website_analysis else {},
|
||||
"target_audience": website_analysis.get('target_audience', '') if website_analysis else '',
|
||||
"research_preferences": research_prefs or {}
|
||||
}
|
||||
|
||||
# Check if persona already exists to avoid unnecessary API calls
|
||||
platform_personas = persona_data.get('platform_personas', {}) if persona_data else {}
|
||||
if platform_personas.get('facebook'):
|
||||
logger.info(f"Facebook persona already exists for user {user_id}, skipping generation")
|
||||
return
|
||||
|
||||
start_time = datetime.utcnow()
|
||||
# Generate Facebook persona
|
||||
facebook_service = FacebookPersonaService()
|
||||
try:
|
||||
generated_persona = facebook_service.generate_facebook_persona(
|
||||
core_persona,
|
||||
onboarding_data
|
||||
)
|
||||
execution_time = (datetime.utcnow() - start_time).total_seconds()
|
||||
|
||||
if generated_persona and "error" not in generated_persona:
|
||||
# Save to database
|
||||
success = persona_data_service.save_platform_persona(user_id, 'facebook', generated_persona)
|
||||
if success:
|
||||
logger.info(f"✅ Scheduled Facebook persona generation completed for user {user_id}")
|
||||
|
||||
# Log success to scheduler event log for dashboard
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_completed',
|
||||
event_date=start_time,
|
||||
job_id=f"facebook_persona_{user_id}",
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
event_data={
|
||||
'job_function': 'generate_facebook_persona_task',
|
||||
'execution_time_seconds': execution_time,
|
||||
'status': 'success'
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
except Exception as log_error:
|
||||
logger.warning(f"Failed to log Facebook persona generation success to scheduler event log: {log_error}")
|
||||
if db:
|
||||
db.rollback()
|
||||
else:
|
||||
error_msg = f"Failed to save Facebook persona for user {user_id}"
|
||||
logger.warning(f"⚠️ {error_msg}")
|
||||
|
||||
# Log failure to scheduler event log
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_failed',
|
||||
event_date=start_time,
|
||||
job_id=f"facebook_persona_{user_id}",
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
error_message=error_msg,
|
||||
event_data={
|
||||
'job_function': 'generate_facebook_persona_task',
|
||||
'execution_time_seconds': execution_time,
|
||||
'status': 'failed',
|
||||
'failure_reason': 'save_failed',
|
||||
'expensive_api_call': True
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
except Exception as log_error:
|
||||
logger.warning(f"Failed to log Facebook persona save failure to scheduler event log: {log_error}")
|
||||
if db:
|
||||
db.rollback()
|
||||
else:
|
||||
error_msg = f"Scheduled Facebook persona generation failed for user {user_id}: {generated_persona}"
|
||||
logger.error(f"❌ {error_msg}")
|
||||
|
||||
# Log failure to scheduler event log for dashboard visibility
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_failed',
|
||||
event_date=start_time,
|
||||
job_id=f"facebook_persona_{user_id}", # Match scheduled job ID format
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
error_message=error_msg,
|
||||
event_data={
|
||||
'job_function': 'generate_facebook_persona_task',
|
||||
'execution_time_seconds': execution_time,
|
||||
'status': 'failed',
|
||||
'failure_reason': 'generation_returned_error',
|
||||
'expensive_api_call': True
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
except Exception as log_error:
|
||||
logger.warning(f"Failed to log Facebook persona generation failure to scheduler event log: {log_error}")
|
||||
if db:
|
||||
db.rollback()
|
||||
except Exception as gen_error:
|
||||
execution_time = (datetime.utcnow() - start_time).total_seconds()
|
||||
error_msg = f"Exception during scheduled Facebook persona generation for user {user_id}: {str(gen_error)}. Expensive API call may have been made."
|
||||
logger.error(f"❌ {error_msg}")
|
||||
|
||||
# Log exception to scheduler event log for dashboard visibility
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_failed',
|
||||
event_date=start_time,
|
||||
job_id=f"facebook_persona_{user_id}", # Match scheduled job ID format
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
error_message=error_msg,
|
||||
event_data={
|
||||
'job_function': 'generate_facebook_persona_task',
|
||||
'execution_time_seconds': execution_time,
|
||||
'status': 'failed',
|
||||
'failure_reason': 'exception',
|
||||
'exception_type': type(gen_error).__name__,
|
||||
'exception_message': str(gen_error),
|
||||
'expensive_api_call': True
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
except Exception as log_error:
|
||||
logger.warning(f"Failed to log Facebook persona generation exception to scheduler event log: {log_error}")
|
||||
if db:
|
||||
db.rollback()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in scheduled Facebook persona generation for user {user_id}: {e}")
|
||||
finally:
|
||||
if db:
|
||||
try:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Error closing database session: {e}")
|
||||
|
||||
|
||||
def schedule_facebook_persona_generation(user_id: str, delay_minutes: int = 20) -> str:
|
||||
"""
|
||||
Schedule Facebook persona generation for a user after a delay.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
delay_minutes: Delay in minutes before generating persona (default: 20)
|
||||
|
||||
Returns:
|
||||
Job ID
|
||||
"""
|
||||
try:
|
||||
from services.scheduler import get_scheduler
|
||||
|
||||
scheduler = get_scheduler()
|
||||
|
||||
# Calculate run date (current time + delay) - ensure UTC timezone-aware
|
||||
run_date = datetime.now(timezone.utc) + timedelta(minutes=delay_minutes)
|
||||
|
||||
# Generate consistent job ID (without timestamp) for proper restoration
|
||||
# This allows restoration to find and restore the job with original scheduled time
|
||||
# Note: Clerk user_id already includes "user_" prefix, so we don't add it again
|
||||
job_id = f"facebook_persona_{user_id}"
|
||||
|
||||
# Schedule the task
|
||||
scheduled_job_id = scheduler.schedule_one_time_task(
|
||||
func=generate_facebook_persona_task,
|
||||
run_date=run_date,
|
||||
job_id=job_id,
|
||||
kwargs={"user_id": user_id},
|
||||
replace_existing=True
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Scheduled Facebook persona generation for user {user_id} "
|
||||
f"at {run_date} (job_id: {scheduled_job_id})"
|
||||
)
|
||||
|
||||
return scheduled_job_id
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to schedule Facebook persona generation for user {user_id}: {e}")
|
||||
raise
|
||||
|
||||
171
backend/services/research/research_persona_prompt_builder.py
Normal file
171
backend/services/research/research_persona_prompt_builder.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""
|
||||
Research Persona Prompt Builder
|
||||
|
||||
Handles building comprehensive prompts for research persona generation.
|
||||
Generates personalized research defaults, suggestions, and configurations.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any
|
||||
import json
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class ResearchPersonaPromptBuilder:
|
||||
"""Builds comprehensive prompts for research persona generation."""
|
||||
|
||||
def build_research_persona_prompt(self, onboarding_data: Dict[str, Any]) -> str:
|
||||
"""Build the research persona generation prompt with comprehensive data."""
|
||||
|
||||
# Extract data from onboarding_data
|
||||
website_analysis = onboarding_data.get("website_analysis", {}) or {}
|
||||
persona_data = onboarding_data.get("persona_data", {}) or {}
|
||||
research_prefs = onboarding_data.get("research_preferences", {}) or {}
|
||||
business_info = onboarding_data.get("business_info", {}) or {}
|
||||
|
||||
# Extract core persona
|
||||
core_persona = persona_data.get("core_persona", {}) or {}
|
||||
|
||||
prompt = f"""
|
||||
COMPREHENSIVE RESEARCH PERSONA GENERATION TASK: Create a highly detailed, personalized research persona based on the user's business, writing style, and content strategy. This persona will provide intelligent defaults and suggestions for research inputs.
|
||||
|
||||
=== USER CONTEXT ===
|
||||
|
||||
BUSINESS INFORMATION:
|
||||
{json.dumps(business_info, indent=2)}
|
||||
|
||||
WEBSITE ANALYSIS:
|
||||
{json.dumps(website_analysis, indent=2)}
|
||||
|
||||
CORE PERSONA:
|
||||
{json.dumps(core_persona, indent=2)}
|
||||
|
||||
RESEARCH PREFERENCES:
|
||||
{json.dumps(research_prefs, indent=2)}
|
||||
|
||||
=== RESEARCH PERSONA GENERATION REQUIREMENTS ===
|
||||
|
||||
Generate a comprehensive research persona in JSON format with the following structure:
|
||||
|
||||
1. DEFAULT VALUES:
|
||||
- "default_industry": Extract from core_persona.industry, business_info.industry, or website_analysis target_audience. Use "General" only if none available.
|
||||
- "default_target_audience": Extract from core_persona.target_audience, website_analysis.target_audience, or business_info.target_audience. Be specific and descriptive.
|
||||
- "default_research_mode": Suggest "basic", "comprehensive", or "targeted" based on research_preferences.research_depth and content_type preferences.
|
||||
- "default_provider": Suggest "google" for news/trends, "exa" for academic/technical deep-dives, or "google" as default.
|
||||
|
||||
2. KEYWORD INTELLIGENCE:
|
||||
- "suggested_keywords": Generate 8-12 keywords relevant to the user's industry, interests (from core_persona), and content goals.
|
||||
- "keyword_expansion_patterns": Create a dictionary mapping common keywords to expanded, industry-specific terms. Include 10-15 patterns like:
|
||||
{{"AI": ["healthcare AI", "medical AI", "clinical AI", "diagnostic AI"], "tools": ["medical devices", "clinical tools"], ...}}
|
||||
Focus on industry-specific terminology from the user's domain.
|
||||
|
||||
3. DOMAIN EXPERTISE:
|
||||
- "suggested_exa_domains": List 4-6 authoritative domains for the user's industry (e.g., Healthcare: ["pubmed.gov", "nejm.org", "thelancet.com"]).
|
||||
- "suggested_exa_category": Suggest appropriate Exa category based on industry:
|
||||
- Healthcare/Science: "research paper"
|
||||
- Finance: "financial report"
|
||||
- Technology/Business: "company" or "news"
|
||||
- Default: null (empty string for all categories)
|
||||
|
||||
4. RESEARCH ANGLES:
|
||||
- "research_angles": Generate 5-8 alternative research angles/focuses based on:
|
||||
- User's pain points and challenges (from core_persona)
|
||||
- Industry trends and opportunities
|
||||
- Content goals (from research_preferences)
|
||||
- Audience interests (from core_persona.interests)
|
||||
Examples: "Compare {{topic}} tools", "{{topic}} ROI analysis", "Latest {{topic}} trends", etc.
|
||||
|
||||
5. QUERY ENHANCEMENT:
|
||||
- "query_enhancement_rules": Create templates for improving vague user queries:
|
||||
{{"vague_ai": "Research: AI applications in {{industry}} for {{audience}}", "vague_tools": "Compare top {{industry}} tools", ...}}
|
||||
Include 5-8 enhancement patterns.
|
||||
|
||||
6. RECOMMENDED PRESETS:
|
||||
- "recommended_presets": Generate 3-5 personalized research preset templates. Each preset should include:
|
||||
- name: Descriptive name (e.g., "{{Industry}} Trends", "{{Audience}} Insights")
|
||||
- keywords: Research query string
|
||||
- industry: User's industry
|
||||
- target_audience: User's target audience
|
||||
- research_mode: "basic", "comprehensive", or "targeted"
|
||||
- config: Complete ResearchConfig object with appropriate settings
|
||||
- description: Brief explanation of what this preset researches
|
||||
Make presets relevant to the user's specific industry, audience, and content goals.
|
||||
|
||||
7. RESEARCH PREFERENCES:
|
||||
- "research_preferences": Extract and structure research preferences from onboarding:
|
||||
- research_depth: From research_preferences.research_depth
|
||||
- content_types: From research_preferences.content_types
|
||||
- auto_research: From research_preferences.auto_research
|
||||
- factual_content: From research_preferences.factual_content
|
||||
|
||||
=== OUTPUT REQUIREMENTS ===
|
||||
|
||||
Return a valid JSON object matching this exact structure:
|
||||
{{
|
||||
"default_industry": "string",
|
||||
"default_target_audience": "string",
|
||||
"default_research_mode": "basic" | "comprehensive" | "targeted",
|
||||
"default_provider": "google" | "exa",
|
||||
"suggested_keywords": ["keyword1", "keyword2", ...],
|
||||
"keyword_expansion_patterns": {{
|
||||
"keyword": ["expansion1", "expansion2", ...]
|
||||
}},
|
||||
"suggested_exa_domains": ["domain1.com", "domain2.com", ...],
|
||||
"suggested_exa_category": "string or null",
|
||||
"research_angles": ["angle1", "angle2", ...],
|
||||
"query_enhancement_rules": {{
|
||||
"pattern": "template"
|
||||
}},
|
||||
"recommended_presets": [
|
||||
{{
|
||||
"name": "string",
|
||||
"keywords": "string",
|
||||
"industry": "string",
|
||||
"target_audience": "string",
|
||||
"research_mode": "basic" | "comprehensive" | "targeted",
|
||||
"config": {{
|
||||
"mode": "basic" | "comprehensive" | "targeted",
|
||||
"provider": "google" | "exa",
|
||||
"max_sources": 10 | 15 | 12,
|
||||
"include_statistics": true | false,
|
||||
"include_expert_quotes": true | false,
|
||||
"include_competitors": true | false,
|
||||
"include_trends": true | false,
|
||||
"exa_category": "string or null",
|
||||
"exa_include_domains": ["domain1.com", ...],
|
||||
"exa_search_type": "auto" | "keyword" | "neural"
|
||||
}},
|
||||
"description": "string"
|
||||
}}
|
||||
],
|
||||
"research_preferences": {{
|
||||
"research_depth": "string",
|
||||
"content_types": ["type1", "type2", ...],
|
||||
"auto_research": true | false,
|
||||
"factual_content": true | false
|
||||
}},
|
||||
"version": "1.0",
|
||||
"confidence_score": 85.0
|
||||
}}
|
||||
|
||||
=== IMPORTANT INSTRUCTIONS ===
|
||||
|
||||
1. Be highly specific and personalized - use actual data from the user's business, persona, and preferences.
|
||||
2. Avoid generic suggestions - every field should reflect the user's unique context.
|
||||
3. For industries not clearly identified, infer from website_analysis.content_characteristics or writing_style.
|
||||
4. Ensure all suggested keywords, domains, and angles are relevant to the user's industry and audience.
|
||||
5. Generate realistic, actionable presets that the user would actually want to use.
|
||||
6. Confidence score should reflect data richness (0-100): higher if rich onboarding data, lower if minimal data.
|
||||
7. Return ONLY valid JSON - no markdown formatting, no explanatory text.
|
||||
|
||||
Generate the research persona now:
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
def get_json_schema(self) -> Dict[str, Any]:
|
||||
"""Return JSON schema for structured LLM response."""
|
||||
# This will be used with llm_text_gen(json_struct=...)
|
||||
from models.research_persona_models import ResearchPersona, ResearchPreset
|
||||
|
||||
# Convert Pydantic model to JSON schema
|
||||
return ResearchPersona.schema()
|
||||
194
backend/services/research/research_persona_scheduler.py
Normal file
194
backend/services/research/research_persona_scheduler.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""
|
||||
Research Persona Scheduler
|
||||
Handles scheduled generation of research personas after onboarding.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Dict, Any
|
||||
from loguru import logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.research.research_persona_service import ResearchPersonaService
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
|
||||
|
||||
async def generate_research_persona_task(user_id: str):
|
||||
"""
|
||||
Async task function to generate research persona for a user.
|
||||
|
||||
This function is called by the scheduler 20 minutes after onboarding completion.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
"""
|
||||
db = None
|
||||
try:
|
||||
logger.info(f"Scheduled research persona generation started for user {user_id}")
|
||||
|
||||
# Get database session
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.error(f"Failed to get database session for research persona generation (user: {user_id})")
|
||||
return
|
||||
|
||||
# Generate research persona
|
||||
persona_service = ResearchPersonaService(db_session=db)
|
||||
|
||||
# Check if persona already exists to avoid unnecessary API calls
|
||||
persona_data = persona_service._get_persona_data_record(user_id)
|
||||
if persona_data and persona_data.research_persona:
|
||||
logger.info(f"Research persona already exists for user {user_id}, skipping generation")
|
||||
return
|
||||
|
||||
start_time = datetime.utcnow()
|
||||
try:
|
||||
research_persona = persona_service.get_or_generate(user_id, force_refresh=False)
|
||||
execution_time = (datetime.utcnow() - start_time).total_seconds()
|
||||
|
||||
if research_persona:
|
||||
logger.info(f"✅ Scheduled research persona generation completed for user {user_id}")
|
||||
|
||||
# Log success to scheduler event log for dashboard
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_completed',
|
||||
event_date=start_time,
|
||||
job_id=f"research_persona_{user_id}",
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
event_data={
|
||||
'job_function': 'generate_research_persona_task',
|
||||
'execution_time_seconds': execution_time,
|
||||
'status': 'success'
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
except Exception as log_error:
|
||||
logger.warning(f"Failed to log persona generation success to scheduler event log: {log_error}")
|
||||
if db:
|
||||
db.rollback()
|
||||
else:
|
||||
error_msg = (
|
||||
f"Scheduled research persona generation FAILED for user {user_id}. "
|
||||
f"Expensive API call was made but generation failed. "
|
||||
f"Will NOT automatically retry to prevent wasteful API calls."
|
||||
)
|
||||
logger.error(f"❌ {error_msg}")
|
||||
|
||||
# Log failure to scheduler event log for dashboard visibility
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_failed',
|
||||
event_date=start_time,
|
||||
job_id=f"research_persona_{user_id}",
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
error_message=error_msg,
|
||||
event_data={
|
||||
'job_function': 'generate_research_persona_task',
|
||||
'execution_time_seconds': execution_time,
|
||||
'status': 'failed',
|
||||
'failure_reason': 'generation_returned_none',
|
||||
'expensive_api_call': True
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
except Exception as log_error:
|
||||
logger.warning(f"Failed to log persona generation failure to scheduler event log: {log_error}")
|
||||
if db:
|
||||
db.rollback()
|
||||
|
||||
# DO NOT reschedule - this prevents infinite retry loops
|
||||
# User can manually trigger generation from frontend if needed
|
||||
except Exception as gen_error:
|
||||
execution_time = (datetime.utcnow() - start_time).total_seconds()
|
||||
error_msg = (
|
||||
f"Exception during scheduled research persona generation for user {user_id}: {str(gen_error)}. "
|
||||
f"Expensive API call may have been made. Will NOT automatically retry."
|
||||
)
|
||||
logger.error(f"❌ {error_msg}")
|
||||
|
||||
# Log exception to scheduler event log for dashboard visibility
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_failed',
|
||||
event_date=start_time,
|
||||
job_id=f"research_persona_{user_id}", # Match scheduled job ID format
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
error_message=error_msg,
|
||||
event_data={
|
||||
'job_function': 'generate_research_persona_task',
|
||||
'execution_time_seconds': execution_time,
|
||||
'status': 'failed',
|
||||
'failure_reason': 'exception',
|
||||
'exception_type': type(gen_error).__name__,
|
||||
'exception_message': str(gen_error),
|
||||
'expensive_api_call': True
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
except Exception as log_error:
|
||||
logger.warning(f"Failed to log persona generation exception to scheduler event log: {log_error}")
|
||||
if db:
|
||||
db.rollback()
|
||||
|
||||
# DO NOT reschedule - prevent infinite retry loops
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in scheduled research persona generation for user {user_id}: {e}")
|
||||
finally:
|
||||
if db:
|
||||
try:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Error closing database session: {e}")
|
||||
|
||||
|
||||
def schedule_research_persona_generation(user_id: str, delay_minutes: int = 20) -> str:
|
||||
"""
|
||||
Schedule research persona generation for a user after a delay.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
delay_minutes: Delay in minutes before generating persona (default: 20)
|
||||
|
||||
Returns:
|
||||
Job ID
|
||||
"""
|
||||
try:
|
||||
from services.scheduler import get_scheduler
|
||||
|
||||
scheduler = get_scheduler()
|
||||
|
||||
# Calculate run date (current time + delay) - ensure UTC timezone-aware
|
||||
run_date = datetime.now(timezone.utc) + timedelta(minutes=delay_minutes)
|
||||
|
||||
# Generate consistent job ID (without timestamp) for proper restoration
|
||||
# This allows restoration to find and restore the job with original scheduled time
|
||||
# Note: Clerk user_id already includes "user_" prefix, so we don't add it again
|
||||
job_id = f"research_persona_{user_id}"
|
||||
|
||||
# Schedule the task
|
||||
scheduled_job_id = scheduler.schedule_one_time_task(
|
||||
func=generate_research_persona_task,
|
||||
run_date=run_date,
|
||||
job_id=job_id,
|
||||
kwargs={"user_id": user_id},
|
||||
replace_existing=True
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Scheduled research persona generation for user {user_id} "
|
||||
f"at {run_date} (job_id: {scheduled_job_id})"
|
||||
)
|
||||
|
||||
return scheduled_job_id
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to schedule research persona generation for user {user_id}: {e}")
|
||||
raise
|
||||
|
||||
384
backend/services/research/research_persona_service.py
Normal file
384
backend/services/research/research_persona_service.py
Normal file
@@ -0,0 +1,384 @@
|
||||
"""
|
||||
Research Persona Service
|
||||
|
||||
Handles generation, caching, and retrieval of AI-powered research personas.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
from loguru import logger
|
||||
from fastapi import HTTPException
|
||||
|
||||
from services.database import get_db_session
|
||||
from models.onboarding import PersonaData, OnboardingSession
|
||||
from models.research_persona_models import ResearchPersona
|
||||
from .research_persona_prompt_builder import ResearchPersonaPromptBuilder
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from services.onboarding.database_service import OnboardingDatabaseService
|
||||
from services.persona_data_service import PersonaDataService
|
||||
|
||||
|
||||
class ResearchPersonaService:
|
||||
"""Service for generating and managing research personas."""
|
||||
|
||||
CACHE_TTL_DAYS = 7 # 7-day cache TTL
|
||||
|
||||
def __init__(self, db_session=None):
|
||||
self.db = db_session or get_db_session()
|
||||
self.prompt_builder = ResearchPersonaPromptBuilder()
|
||||
self.onboarding_service = OnboardingDatabaseService(db=self.db)
|
||||
self.persona_data_service = PersonaDataService(db_session=self.db)
|
||||
|
||||
def get_cached_only(
|
||||
self,
|
||||
user_id: str
|
||||
) -> Optional[ResearchPersona]:
|
||||
"""
|
||||
Get research persona for user ONLY if it exists in cache.
|
||||
This method NEVER generates - it only returns cached personas.
|
||||
Use this for config endpoints to avoid triggering rate limit checks.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
|
||||
Returns:
|
||||
ResearchPersona if cached and valid, None otherwise
|
||||
"""
|
||||
try:
|
||||
# Get persona data record
|
||||
persona_data = self._get_persona_data_record(user_id)
|
||||
|
||||
if not persona_data:
|
||||
logger.debug(f"No persona data found for user {user_id}")
|
||||
return None
|
||||
|
||||
# Only return if cache is valid and persona exists
|
||||
if self.is_cache_valid(persona_data) and persona_data.research_persona:
|
||||
try:
|
||||
logger.debug(f"Returning cached research persona for user {user_id}")
|
||||
return ResearchPersona(**persona_data.research_persona)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse cached research persona: {e}")
|
||||
return None
|
||||
|
||||
# Cache invalid or persona missing - return None (don't generate)
|
||||
logger.debug(f"No valid cached research persona for user {user_id}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting cached research persona for user {user_id}: {e}")
|
||||
return None
|
||||
|
||||
def get_or_generate(
|
||||
self,
|
||||
user_id: str,
|
||||
force_refresh: bool = False
|
||||
) -> Optional[ResearchPersona]:
|
||||
"""
|
||||
Get research persona for user, generating if missing or expired.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
force_refresh: If True, regenerate even if cache is valid
|
||||
|
||||
Returns:
|
||||
ResearchPersona if successful, None otherwise
|
||||
"""
|
||||
try:
|
||||
# Get persona data record
|
||||
persona_data = self._get_persona_data_record(user_id)
|
||||
|
||||
if not persona_data:
|
||||
logger.warning(f"No persona data found for user {user_id}, cannot generate research persona")
|
||||
return None
|
||||
|
||||
# Check cache if not forcing refresh
|
||||
if not force_refresh and self.is_cache_valid(persona_data):
|
||||
if persona_data.research_persona:
|
||||
logger.info(f"Using cached research persona for user {user_id}")
|
||||
try:
|
||||
return ResearchPersona(**persona_data.research_persona)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse cached research persona: {e}, regenerating...")
|
||||
# Fall through to regeneration
|
||||
else:
|
||||
logger.info(f"Research persona missing for user {user_id}, generating...")
|
||||
else:
|
||||
if force_refresh:
|
||||
logger.info(f"Forcing refresh of research persona for user {user_id}")
|
||||
else:
|
||||
logger.info(f"Cache expired for user {user_id}, regenerating...")
|
||||
|
||||
# Generate new research persona
|
||||
try:
|
||||
research_persona = self.generate_research_persona(user_id)
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
|
||||
raise
|
||||
|
||||
if research_persona:
|
||||
# Save to database
|
||||
if self.save_research_persona(user_id, research_persona):
|
||||
logger.info(f"✅ Research persona generated and saved for user {user_id}")
|
||||
else:
|
||||
logger.warning(f"Failed to save research persona for user {user_id}")
|
||||
|
||||
return research_persona
|
||||
else:
|
||||
# Log detailed error for debugging expensive failures
|
||||
logger.error(
|
||||
f"❌ Failed to generate research persona for user {user_id} - "
|
||||
f"This is an expensive failure (API call consumed). Check logs above for details."
|
||||
)
|
||||
# Don't return None silently - let the caller know this failed
|
||||
return None
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting/generating research persona for user {user_id}: {e}")
|
||||
return None
|
||||
|
||||
def generate_research_persona(self, user_id: str) -> Optional[ResearchPersona]:
|
||||
"""
|
||||
Generate a new research persona for the user.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
|
||||
Returns:
|
||||
ResearchPersona if successful, None otherwise
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Generating research persona for user {user_id}")
|
||||
|
||||
# Collect onboarding data
|
||||
onboarding_data = self._collect_onboarding_data(user_id)
|
||||
|
||||
if not onboarding_data:
|
||||
logger.warning(f"Insufficient onboarding data for user {user_id}")
|
||||
return None
|
||||
|
||||
# Build prompt
|
||||
prompt = self.prompt_builder.build_research_persona_prompt(onboarding_data)
|
||||
|
||||
# Get JSON schema for structured response
|
||||
json_schema = self.prompt_builder.get_json_schema()
|
||||
|
||||
# Call LLM with structured JSON response
|
||||
logger.info(f"Calling LLM for research persona generation (user: {user_id})")
|
||||
try:
|
||||
response_text = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=json_schema,
|
||||
user_id=user_id
|
||||
)
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
|
||||
logger.warning(f"HTTPException during LLM call for user {user_id} - re-raising")
|
||||
raise
|
||||
except RuntimeError as e:
|
||||
# Re-raise RuntimeError (subscription limits) as HTTPException
|
||||
logger.warning(f"RuntimeError during LLM call for user {user_id}: {e}")
|
||||
raise HTTPException(status_code=429, detail=str(e))
|
||||
|
||||
if not response_text:
|
||||
logger.error("Empty response from LLM")
|
||||
return None
|
||||
|
||||
# Parse JSON response
|
||||
import json
|
||||
try:
|
||||
# When json_struct is provided, llm_text_gen may return a dict directly
|
||||
if isinstance(response_text, dict):
|
||||
# Already parsed, use directly
|
||||
persona_dict = response_text
|
||||
elif isinstance(response_text, str):
|
||||
# Handle case where LLM returns markdown-wrapped JSON or plain JSON string
|
||||
response_text = response_text.strip()
|
||||
if response_text.startswith("```json"):
|
||||
response_text = response_text[7:]
|
||||
if response_text.startswith("```"):
|
||||
response_text = response_text[3:]
|
||||
if response_text.endswith("```"):
|
||||
response_text = response_text[:-3]
|
||||
response_text = response_text.strip()
|
||||
|
||||
persona_dict = json.loads(response_text)
|
||||
else:
|
||||
logger.error(f"Unexpected response type from LLM: {type(response_text)}")
|
||||
return None
|
||||
|
||||
# Add generated_at timestamp
|
||||
persona_dict["generated_at"] = datetime.utcnow().isoformat()
|
||||
|
||||
# Validate and create ResearchPersona
|
||||
# Log the dict structure for debugging if validation fails
|
||||
try:
|
||||
research_persona = ResearchPersona(**persona_dict)
|
||||
logger.info(f"✅ Research persona generated successfully for user {user_id}")
|
||||
return research_persona
|
||||
except Exception as validation_error:
|
||||
logger.error(f"Failed to validate ResearchPersona from dict: {validation_error}")
|
||||
logger.debug(f"Persona dict keys: {list(persona_dict.keys()) if isinstance(persona_dict, dict) else 'Not a dict'}")
|
||||
logger.debug(f"Persona dict sample: {str(persona_dict)[:500]}")
|
||||
# Re-raise to be caught by outer exception handler
|
||||
raise
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse LLM response as JSON: {e}")
|
||||
logger.debug(f"Response text: {response_text[:500] if isinstance(response_text, str) else str(response_text)[:500]}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create ResearchPersona from response: {e}")
|
||||
return None
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating research persona for user {user_id}: {e}")
|
||||
return None
|
||||
|
||||
def is_cache_valid(self, persona_data: PersonaData) -> bool:
|
||||
"""
|
||||
Check if cached research persona is still valid (within TTL).
|
||||
|
||||
Args:
|
||||
persona_data: PersonaData database record
|
||||
|
||||
Returns:
|
||||
True if cache is valid, False otherwise
|
||||
"""
|
||||
if not persona_data.research_persona_generated_at:
|
||||
return False
|
||||
|
||||
# Check if within TTL
|
||||
cache_age = datetime.utcnow() - persona_data.research_persona_generated_at
|
||||
is_valid = cache_age < timedelta(days=self.CACHE_TTL_DAYS)
|
||||
|
||||
if not is_valid:
|
||||
logger.debug(f"Cache expired (age: {cache_age.days} days, TTL: {self.CACHE_TTL_DAYS} days)")
|
||||
|
||||
return is_valid
|
||||
|
||||
def save_research_persona(
|
||||
self,
|
||||
user_id: str,
|
||||
research_persona: ResearchPersona
|
||||
) -> bool:
|
||||
"""
|
||||
Save research persona to database.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
research_persona: ResearchPersona to save
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
persona_data = self._get_persona_data_record(user_id)
|
||||
|
||||
if not persona_data:
|
||||
logger.error(f"No persona data record found for user {user_id}")
|
||||
return False
|
||||
|
||||
# Convert ResearchPersona to dict for JSON storage
|
||||
persona_dict = research_persona.dict()
|
||||
|
||||
# Update database record
|
||||
persona_data.research_persona = persona_dict
|
||||
persona_data.research_persona_generated_at = datetime.utcnow()
|
||||
|
||||
self.db.commit()
|
||||
|
||||
logger.info(f"✅ Research persona saved for user {user_id}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving research persona for user {user_id}: {e}")
|
||||
self.db.rollback()
|
||||
return False
|
||||
|
||||
def _get_persona_data_record(self, user_id: str) -> Optional[PersonaData]:
|
||||
"""Get PersonaData database record for user."""
|
||||
try:
|
||||
# Ensure research_persona columns exist before querying
|
||||
self.onboarding_service._ensure_research_persona_columns(self.db)
|
||||
|
||||
# Get onboarding session
|
||||
session = self.db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not session:
|
||||
return None
|
||||
|
||||
# Get persona data
|
||||
persona_data = self.db.query(PersonaData).filter(
|
||||
PersonaData.session_id == session.id
|
||||
).first()
|
||||
|
||||
return persona_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting persona data record for user {user_id}: {e}")
|
||||
return None
|
||||
|
||||
def _collect_onboarding_data(self, user_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Collect all onboarding data needed for research persona generation.
|
||||
|
||||
Returns:
|
||||
Dictionary with website_analysis, persona_data, research_preferences, business_info
|
||||
"""
|
||||
try:
|
||||
# Get website analysis
|
||||
website_analysis = self.onboarding_service.get_website_analysis(user_id, self.db) or {}
|
||||
|
||||
# Get persona data
|
||||
persona_data_dict = self.onboarding_service.get_persona_data(user_id, self.db) or {}
|
||||
|
||||
# Get research preferences
|
||||
research_prefs = self.onboarding_service.get_research_preferences(user_id, self.db) or {}
|
||||
|
||||
# Get business info - construct from persona data and website analysis
|
||||
business_info = {}
|
||||
|
||||
# Try to extract from persona data
|
||||
if persona_data_dict:
|
||||
core_persona = persona_data_dict.get('corePersona') or persona_data_dict.get('core_persona')
|
||||
if core_persona:
|
||||
if core_persona.get('industry'):
|
||||
business_info['industry'] = core_persona['industry']
|
||||
if core_persona.get('target_audience'):
|
||||
business_info['target_audience'] = core_persona['target_audience']
|
||||
|
||||
# Fallback to website analysis if not in persona
|
||||
if not business_info.get('industry') and website_analysis:
|
||||
target_audience_data = website_analysis.get('target_audience', {})
|
||||
if isinstance(target_audience_data, dict):
|
||||
industry_focus = target_audience_data.get('industry_focus')
|
||||
if industry_focus:
|
||||
business_info['industry'] = industry_focus
|
||||
demographics = target_audience_data.get('demographics')
|
||||
if demographics:
|
||||
business_info['target_audience'] = demographics if isinstance(demographics, str) else str(demographics)
|
||||
|
||||
# Check if we have enough data
|
||||
if not website_analysis and not persona_data_dict:
|
||||
logger.warning(f"Insufficient onboarding data for user {user_id}")
|
||||
return None
|
||||
|
||||
return {
|
||||
"website_analysis": website_analysis,
|
||||
"persona_data": persona_data_dict,
|
||||
"research_preferences": research_prefs,
|
||||
"business_info": business_info
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error collecting onboarding data for user {user_id}: {e}")
|
||||
return None
|
||||
@@ -10,7 +10,9 @@ from .core.exception_handler import (
|
||||
TaskExecutionError, DatabaseError, TaskLoaderError, SchedulerConfigError
|
||||
)
|
||||
from .executors.monitoring_task_executor import MonitoringTaskExecutor
|
||||
from .executors.oauth_token_monitoring_executor import OAuthTokenMonitoringExecutor
|
||||
from .utils.task_loader import load_due_monitoring_tasks
|
||||
from .utils.oauth_token_task_loader import load_due_oauth_token_monitoring_tasks
|
||||
|
||||
# Global scheduler instance (initialized on first access)
|
||||
_scheduler_instance: TaskScheduler = None
|
||||
@@ -37,6 +39,14 @@ def get_scheduler() -> TaskScheduler:
|
||||
monitoring_executor,
|
||||
load_due_monitoring_tasks
|
||||
)
|
||||
|
||||
# Register OAuth token monitoring executor
|
||||
oauth_token_executor = OAuthTokenMonitoringExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'oauth_token_monitoring',
|
||||
oauth_token_executor,
|
||||
load_due_oauth_token_monitoring_tasks
|
||||
)
|
||||
|
||||
return _scheduler_instance
|
||||
|
||||
@@ -46,6 +56,7 @@ __all__ = [
|
||||
'TaskExecutor',
|
||||
'TaskExecutionResult',
|
||||
'MonitoringTaskExecutor',
|
||||
'OAuthTokenMonitoringExecutor',
|
||||
'get_scheduler',
|
||||
# Exception handling
|
||||
'SchedulerExceptionHandler',
|
||||
|
||||
141
backend/services/scheduler/core/check_cycle_handler.py
Normal file
141
backend/services/scheduler/core/check_cycle_handler.py
Normal file
@@ -0,0 +1,141 @@
|
||||
"""
|
||||
Check Cycle Handler
|
||||
Handles the main scheduler check cycle that finds and executes due tasks.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Dict, Any
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from services.database import get_db_session
|
||||
from utils.logger_utils import get_service_logger
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from .exception_handler import DatabaseError
|
||||
from .interval_manager import adjust_check_interval_if_needed
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .scheduler import TaskScheduler
|
||||
|
||||
logger = get_service_logger("check_cycle_handler")
|
||||
|
||||
|
||||
async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
"""
|
||||
Main scheduler loop: check for due tasks and execute them.
|
||||
This runs periodically with intelligent interval adjustment based on active strategies.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
scheduler.stats['total_checks'] += 1
|
||||
check_start_time = datetime.utcnow()
|
||||
scheduler.stats['last_check'] = check_start_time.isoformat()
|
||||
|
||||
# Track execution summary for this check cycle
|
||||
cycle_summary = {
|
||||
'tasks_found_by_type': {},
|
||||
'tasks_executed_by_type': {},
|
||||
'tasks_failed_by_type': {},
|
||||
'total_found': 0,
|
||||
'total_executed': 0,
|
||||
'total_failed': 0
|
||||
}
|
||||
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db is None:
|
||||
logger.error("[Scheduler Check] ❌ Failed to get database session")
|
||||
return
|
||||
|
||||
# Check for active strategies and adjust interval intelligently
|
||||
await adjust_check_interval_if_needed(scheduler, db)
|
||||
|
||||
# Check each registered task type
|
||||
registered_types = scheduler.registry.get_registered_types()
|
||||
for task_type in registered_types:
|
||||
type_summary = await scheduler._process_task_type(task_type, db, cycle_summary)
|
||||
if type_summary:
|
||||
cycle_summary['tasks_found_by_type'][task_type] = type_summary.get('found', 0)
|
||||
cycle_summary['tasks_executed_by_type'][task_type] = type_summary.get('executed', 0)
|
||||
cycle_summary['tasks_failed_by_type'][task_type] = type_summary.get('failed', 0)
|
||||
|
||||
# Calculate totals
|
||||
cycle_summary['total_found'] = sum(cycle_summary['tasks_found_by_type'].values())
|
||||
cycle_summary['total_executed'] = sum(cycle_summary['tasks_executed_by_type'].values())
|
||||
cycle_summary['total_failed'] = sum(cycle_summary['tasks_failed_by_type'].values())
|
||||
|
||||
# Log comprehensive check cycle summary
|
||||
check_duration = (datetime.utcnow() - check_start_time).total_seconds()
|
||||
active_strategies = scheduler.stats.get('active_strategies_count', 0)
|
||||
active_executions = len(scheduler.active_executions)
|
||||
|
||||
# Build comprehensive check cycle summary log message
|
||||
check_lines = [
|
||||
f"[Scheduler Check] 🔍 Check Cycle #{scheduler.stats['total_checks']} Completed",
|
||||
f" ├─ Duration: {check_duration:.2f}s",
|
||||
f" ├─ Active Strategies: {active_strategies}",
|
||||
f" ├─ Check Interval: {scheduler.current_check_interval_minutes}min",
|
||||
f" ├─ User Isolation: Enabled (tasks filtered by user_id)",
|
||||
f" ├─ Tasks Found: {cycle_summary['total_found']} total"
|
||||
]
|
||||
|
||||
if cycle_summary['tasks_found_by_type']:
|
||||
task_types_list = list(cycle_summary['tasks_found_by_type'].items())
|
||||
for idx, (task_type, count) in enumerate(task_types_list):
|
||||
executed = cycle_summary['tasks_executed_by_type'].get(task_type, 0)
|
||||
failed = cycle_summary['tasks_failed_by_type'].get(task_type, 0)
|
||||
is_last_task_type = idx == len(task_types_list) - 1 and cycle_summary['total_executed'] == 0 and cycle_summary['total_failed'] == 0
|
||||
prefix = " └─" if is_last_task_type else " ├─"
|
||||
check_lines.append(f"{prefix} {task_type}: {count} found, {executed} executed, {failed} failed")
|
||||
|
||||
if cycle_summary['total_found'] > 0:
|
||||
check_lines.append(f" ├─ Total Executed: {cycle_summary['total_executed']}")
|
||||
check_lines.append(f" ├─ Total Failed: {cycle_summary['total_failed']}")
|
||||
check_lines.append(f" └─ Active Executions: {active_executions}/{scheduler.max_concurrent_executions}")
|
||||
else:
|
||||
check_lines.append(f" └─ No tasks found - scheduler idle")
|
||||
|
||||
# Log comprehensive check cycle summary in single message
|
||||
logger.warning("\n".join(check_lines))
|
||||
|
||||
# Save check cycle event to database for historical tracking
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='check_cycle',
|
||||
event_date=check_start_time,
|
||||
check_cycle_number=scheduler.stats['total_checks'],
|
||||
check_interval_minutes=scheduler.current_check_interval_minutes,
|
||||
tasks_found=cycle_summary.get('total_found', 0),
|
||||
tasks_executed=cycle_summary.get('total_executed', 0),
|
||||
tasks_failed=cycle_summary.get('total_failed', 0),
|
||||
tasks_by_type=cycle_summary.get('tasks_found_by_type', {}),
|
||||
check_duration_seconds=check_duration,
|
||||
active_strategies_count=active_strategies,
|
||||
active_executions=active_executions,
|
||||
event_data={
|
||||
'executed_by_type': cycle_summary.get('tasks_executed_by_type', {}),
|
||||
'failed_by_type': cycle_summary.get('tasks_failed_by_type', {})
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save check cycle event log: {e}")
|
||||
if db:
|
||||
db.rollback()
|
||||
|
||||
# Update last_update timestamp for frontend polling
|
||||
scheduler.stats['last_update'] = datetime.utcnow().isoformat()
|
||||
|
||||
except Exception as e:
|
||||
error = DatabaseError(
|
||||
message=f"Error checking for due tasks: {str(e)}",
|
||||
original_error=e
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error)
|
||||
logger.error(f"[Scheduler Check] ❌ Error in check cycle: {str(e)}")
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
139
backend/services/scheduler/core/interval_manager.py
Normal file
139
backend/services/scheduler/core/interval_manager.py
Normal file
@@ -0,0 +1,139 @@
|
||||
"""
|
||||
Interval Manager
|
||||
Handles intelligent scheduling interval adjustment based on active strategies.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from services.database import get_db_session
|
||||
from utils.logger_utils import get_service_logger
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .scheduler import TaskScheduler
|
||||
|
||||
logger = get_service_logger("interval_manager")
|
||||
|
||||
|
||||
async def determine_optimal_interval(
|
||||
scheduler: 'TaskScheduler',
|
||||
min_interval: int,
|
||||
max_interval: int
|
||||
) -> int:
|
||||
"""
|
||||
Determine optimal check interval based on active strategies.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
min_interval: Minimum check interval in minutes
|
||||
max_interval: Maximum check interval in minutes
|
||||
|
||||
Returns:
|
||||
Optimal check interval in minutes
|
||||
"""
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
scheduler.stats['active_strategies_count'] = active_count
|
||||
|
||||
if active_count > 0:
|
||||
logger.info(f"Found {active_count} active strategies with tasks - using {min_interval}min interval")
|
||||
return min_interval
|
||||
else:
|
||||
logger.info(f"No active strategies with tasks - using {max_interval}min interval")
|
||||
return max_interval
|
||||
except Exception as e:
|
||||
logger.warning(f"Error determining optimal interval: {e}, using default {min_interval}min")
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
# Default to shorter interval on error (safer)
|
||||
return min_interval
|
||||
|
||||
|
||||
async def adjust_check_interval_if_needed(
|
||||
scheduler: 'TaskScheduler',
|
||||
db: Session
|
||||
):
|
||||
"""
|
||||
Intelligently adjust check interval based on active strategies.
|
||||
|
||||
If there are active strategies with tasks, check more frequently.
|
||||
If there are no active strategies, check less frequently.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
db: Database session
|
||||
"""
|
||||
try:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
scheduler.stats['active_strategies_count'] = active_count
|
||||
|
||||
# Determine optimal interval
|
||||
if active_count > 0:
|
||||
optimal_interval = scheduler.min_check_interval_minutes
|
||||
else:
|
||||
optimal_interval = scheduler.max_check_interval_minutes
|
||||
|
||||
# Only reschedule if interval needs to change
|
||||
if optimal_interval != scheduler.current_check_interval_minutes:
|
||||
interval_message = (
|
||||
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
|
||||
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
|
||||
f" ├─ Optimal: {optimal_interval}min\n"
|
||||
f" ├─ Active Strategies: {active_count}\n"
|
||||
f" └─ Reason: {'Active strategies detected' if active_count > 0 else 'No active strategies'}"
|
||||
)
|
||||
logger.warning(interval_message)
|
||||
|
||||
# Reschedule the job with new interval
|
||||
scheduler.scheduler.modify_job(
|
||||
'check_due_tasks',
|
||||
trigger=scheduler._get_trigger_for_interval(optimal_interval)
|
||||
)
|
||||
|
||||
# Save previous interval before updating
|
||||
previous_interval = scheduler.current_check_interval_minutes
|
||||
|
||||
# Update current interval
|
||||
scheduler.current_check_interval_minutes = optimal_interval
|
||||
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
|
||||
|
||||
# Save interval adjustment event to database
|
||||
try:
|
||||
event_db = get_db_session()
|
||||
if event_db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='interval_adjustment',
|
||||
event_date=datetime.utcnow(),
|
||||
previous_interval_minutes=previous_interval,
|
||||
new_interval_minutes=optimal_interval,
|
||||
check_interval_minutes=optimal_interval,
|
||||
active_strategies_count=active_count,
|
||||
event_data={
|
||||
'reason': 'intelligent_scheduling',
|
||||
'min_interval': scheduler.min_check_interval_minutes,
|
||||
'max_interval': scheduler.max_check_interval_minutes
|
||||
}
|
||||
)
|
||||
event_db.add(event_log)
|
||||
event_db.commit()
|
||||
event_db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save interval adjustment event log: {e}")
|
||||
|
||||
logger.warning(f"[Scheduler] ✅ Interval adjusted to {optimal_interval}min")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error adjusting check interval: {e}")
|
||||
|
||||
269
backend/services/scheduler/core/job_restoration.py
Normal file
269
backend/services/scheduler/core/job_restoration.py
Normal file
@@ -0,0 +1,269 @@
|
||||
"""
|
||||
Job Restoration
|
||||
Handles restoration of one-time jobs (e.g., persona generation) on scheduler startup.
|
||||
Preserves original scheduled times from database to avoid rescheduling on server restarts.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from utils.logger_utils import get_service_logger
|
||||
from services.database import get_db_session
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .scheduler import TaskScheduler
|
||||
|
||||
logger = get_service_logger("job_restoration")
|
||||
|
||||
|
||||
async def restore_persona_jobs(scheduler: 'TaskScheduler'):
|
||||
"""
|
||||
Restore one-time persona generation jobs for users who completed onboarding
|
||||
but don't have personas yet. This ensures jobs persist across server restarts.
|
||||
|
||||
IMPORTANT: Preserves original scheduled times from SchedulerEventLog to avoid
|
||||
rescheduling jobs with new times on server restarts.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
try:
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("Could not get database session to restore persona jobs")
|
||||
return
|
||||
|
||||
try:
|
||||
from models.onboarding import OnboardingSession
|
||||
from services.research.research_persona_scheduler import (
|
||||
schedule_research_persona_generation,
|
||||
generate_research_persona_task
|
||||
)
|
||||
from services.persona.facebook.facebook_persona_scheduler import (
|
||||
schedule_facebook_persona_generation,
|
||||
generate_facebook_persona_task
|
||||
)
|
||||
from services.research.research_persona_service import ResearchPersonaService
|
||||
from services.persona_data_service import PersonaDataService
|
||||
|
||||
# Get all users who completed onboarding
|
||||
completed_sessions = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.progress == 100.0
|
||||
).all()
|
||||
|
||||
restored_count = 0
|
||||
skipped_count = 0
|
||||
now = datetime.utcnow().replace(tzinfo=timezone.utc)
|
||||
|
||||
for session in completed_sessions:
|
||||
user_id = session.user_id
|
||||
|
||||
# Restore research persona job
|
||||
try:
|
||||
research_service = ResearchPersonaService(db_session=db)
|
||||
persona_data_record = research_service._get_persona_data_record(user_id)
|
||||
research_persona_exists = False
|
||||
|
||||
if persona_data_record:
|
||||
research_persona_data = getattr(persona_data_record, 'research_persona', None)
|
||||
research_persona_exists = bool(research_persona_data)
|
||||
|
||||
if not research_persona_exists:
|
||||
# Note: Clerk user_id already includes "user_" prefix
|
||||
job_id = f"research_persona_{user_id}"
|
||||
|
||||
# Check if job already exists in scheduler (just started, so unlikely)
|
||||
existing_jobs = [j for j in scheduler.scheduler.get_jobs()
|
||||
if j.id == job_id]
|
||||
|
||||
if not existing_jobs:
|
||||
# Check SchedulerEventLog for original scheduled time
|
||||
original_scheduled_event = db.query(SchedulerEventLog).filter(
|
||||
SchedulerEventLog.event_type == 'job_scheduled',
|
||||
SchedulerEventLog.job_id == job_id,
|
||||
SchedulerEventLog.user_id == user_id
|
||||
).order_by(SchedulerEventLog.event_date.desc()).first()
|
||||
|
||||
# Check if job was already completed or failed
|
||||
completed_event = db.query(SchedulerEventLog).filter(
|
||||
SchedulerEventLog.event_type.in_(['job_completed', 'job_failed']),
|
||||
SchedulerEventLog.job_id == job_id,
|
||||
SchedulerEventLog.user_id == user_id
|
||||
).order_by(SchedulerEventLog.event_date.desc()).first()
|
||||
|
||||
if completed_event:
|
||||
# Job was already completed/failed, skip
|
||||
skipped_count += 1
|
||||
logger.debug(f"Research persona job {job_id} already completed/failed, skipping restoration")
|
||||
elif original_scheduled_event and original_scheduled_event.event_data:
|
||||
# Restore with original scheduled time
|
||||
scheduled_for_str = original_scheduled_event.event_data.get('scheduled_for')
|
||||
if scheduled_for_str:
|
||||
try:
|
||||
original_time = datetime.fromisoformat(scheduled_for_str.replace('Z', '+00:00'))
|
||||
if original_time.tzinfo is None:
|
||||
original_time = original_time.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Check if original time is in the past (within grace period)
|
||||
time_since_scheduled = (now - original_time).total_seconds()
|
||||
if time_since_scheduled > 0 and time_since_scheduled <= 3600: # Within 1 hour grace period
|
||||
# Execute immediately (missed job)
|
||||
logger.warning(f"Restoring research persona job {job_id} - original time was {original_time}, executing now (missed)")
|
||||
try:
|
||||
await generate_research_persona_task(user_id)
|
||||
except Exception as exec_error:
|
||||
logger.error(f"Error executing missed research persona job {job_id}: {exec_error}")
|
||||
elif original_time > now:
|
||||
# Restore with original future time
|
||||
time_until_run = (original_time - now).total_seconds() / 60 # minutes
|
||||
logger.warning(
|
||||
f"[Restoration] Restoring research persona job {job_id} with ORIGINAL scheduled time: "
|
||||
f"{original_time} (UTC) = {original_time.astimezone().strftime('%H:%M:%S %Z')} (local), "
|
||||
f"will run in {time_until_run:.1f} minutes"
|
||||
)
|
||||
scheduler.schedule_one_time_task(
|
||||
func=generate_research_persona_task,
|
||||
run_date=original_time,
|
||||
job_id=job_id,
|
||||
kwargs={'user_id': user_id},
|
||||
replace_existing=True
|
||||
)
|
||||
restored_count += 1
|
||||
else:
|
||||
# Too old (beyond grace period), skip
|
||||
skipped_count += 1
|
||||
logger.debug(f"Research persona job {job_id} scheduled time {original_time} is too old, skipping")
|
||||
except Exception as time_error:
|
||||
logger.warning(f"Error parsing original scheduled time for {job_id}: {time_error}, scheduling new job")
|
||||
# Fall through to schedule new job
|
||||
schedule_research_persona_generation(user_id, delay_minutes=20)
|
||||
restored_count += 1
|
||||
else:
|
||||
# No original time in event data, schedule new job
|
||||
logger.warning(
|
||||
f"[Restoration] No original scheduled time found for research persona job {job_id}, "
|
||||
f"scheduling NEW job with current time + 20 minutes"
|
||||
)
|
||||
schedule_research_persona_generation(user_id, delay_minutes=20)
|
||||
restored_count += 1
|
||||
else:
|
||||
# No previous scheduled event, schedule new job
|
||||
logger.warning(
|
||||
f"[Restoration] No previous scheduled event found for research persona job {job_id}, "
|
||||
f"scheduling NEW job with current time + 20 minutes"
|
||||
)
|
||||
schedule_research_persona_generation(user_id, delay_minutes=20)
|
||||
restored_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
logger.debug(f"Research persona job {job_id} already exists in scheduler, skipping restoration")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not restore research persona for user {user_id}: {e}")
|
||||
|
||||
# Restore Facebook persona job
|
||||
try:
|
||||
persona_data_service = PersonaDataService(db_session=db)
|
||||
persona_data = persona_data_service.get_user_persona_data(user_id)
|
||||
platform_personas = persona_data.get('platform_personas', {}) if persona_data else {}
|
||||
facebook_persona_exists = bool(platform_personas.get('facebook') if platform_personas else None)
|
||||
has_core_persona = bool(persona_data.get('core_persona') if persona_data else False)
|
||||
|
||||
if not facebook_persona_exists and has_core_persona:
|
||||
# Note: Clerk user_id already includes "user_" prefix
|
||||
job_id = f"facebook_persona_{user_id}"
|
||||
|
||||
# Check if job already exists in scheduler
|
||||
existing_jobs = [j for j in scheduler.scheduler.get_jobs()
|
||||
if j.id == job_id]
|
||||
|
||||
if not existing_jobs:
|
||||
# Check SchedulerEventLog for original scheduled time
|
||||
original_scheduled_event = db.query(SchedulerEventLog).filter(
|
||||
SchedulerEventLog.event_type == 'job_scheduled',
|
||||
SchedulerEventLog.job_id == job_id,
|
||||
SchedulerEventLog.user_id == user_id
|
||||
).order_by(SchedulerEventLog.event_date.desc()).first()
|
||||
|
||||
# Check if job was already completed or failed
|
||||
completed_event = db.query(SchedulerEventLog).filter(
|
||||
SchedulerEventLog.event_type.in_(['job_completed', 'job_failed']),
|
||||
SchedulerEventLog.job_id == job_id,
|
||||
SchedulerEventLog.user_id == user_id
|
||||
).order_by(SchedulerEventLog.event_date.desc()).first()
|
||||
|
||||
if completed_event:
|
||||
skipped_count += 1
|
||||
logger.debug(f"Facebook persona job {job_id} already completed/failed, skipping restoration")
|
||||
elif original_scheduled_event and original_scheduled_event.event_data:
|
||||
# Restore with original scheduled time
|
||||
scheduled_for_str = original_scheduled_event.event_data.get('scheduled_for')
|
||||
if scheduled_for_str:
|
||||
try:
|
||||
original_time = datetime.fromisoformat(scheduled_for_str.replace('Z', '+00:00'))
|
||||
if original_time.tzinfo is None:
|
||||
original_time = original_time.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Check if original time is in the past (within grace period)
|
||||
time_since_scheduled = (now - original_time).total_seconds()
|
||||
if time_since_scheduled > 0 and time_since_scheduled <= 3600: # Within 1 hour grace period
|
||||
# Execute immediately (missed job)
|
||||
logger.warning(f"Restoring Facebook persona job {job_id} - original time was {original_time}, executing now (missed)")
|
||||
try:
|
||||
await generate_facebook_persona_task(user_id)
|
||||
except Exception as exec_error:
|
||||
logger.error(f"Error executing missed Facebook persona job {job_id}: {exec_error}")
|
||||
elif original_time > now:
|
||||
# Restore with original future time
|
||||
time_until_run = (original_time - now).total_seconds() / 60 # minutes
|
||||
logger.warning(
|
||||
f"[Restoration] Restoring Facebook persona job {job_id} with ORIGINAL scheduled time: "
|
||||
f"{original_time} (UTC) = {original_time.astimezone().strftime('%H:%M:%S %Z')} (local), "
|
||||
f"will run in {time_until_run:.1f} minutes"
|
||||
)
|
||||
scheduler.schedule_one_time_task(
|
||||
func=generate_facebook_persona_task,
|
||||
run_date=original_time,
|
||||
job_id=job_id,
|
||||
kwargs={'user_id': user_id},
|
||||
replace_existing=True
|
||||
)
|
||||
restored_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
logger.debug(f"Facebook persona job {job_id} scheduled time {original_time} is too old, skipping")
|
||||
except Exception as time_error:
|
||||
logger.warning(f"Error parsing original scheduled time for {job_id}: {time_error}, scheduling new job")
|
||||
schedule_facebook_persona_generation(user_id, delay_minutes=20)
|
||||
restored_count += 1
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Restoration] No original scheduled time found for Facebook persona job {job_id}, "
|
||||
f"scheduling NEW job with current time + 20 minutes"
|
||||
)
|
||||
schedule_facebook_persona_generation(user_id, delay_minutes=20)
|
||||
restored_count += 1
|
||||
else:
|
||||
# No previous scheduled event, schedule new job
|
||||
logger.warning(
|
||||
f"[Restoration] No previous scheduled event found for Facebook persona job {job_id}, "
|
||||
f"scheduling NEW job with current time + 20 minutes"
|
||||
)
|
||||
schedule_facebook_persona_generation(user_id, delay_minutes=20)
|
||||
restored_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
logger.debug(f"Facebook persona job {job_id} already exists in scheduler, skipping restoration")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not restore Facebook persona for user {user_id}: {e}")
|
||||
|
||||
if restored_count > 0:
|
||||
logger.warning(f"[Scheduler] ✅ Restored {restored_count} persona generation job(s) on startup (preserved original scheduled times)")
|
||||
if skipped_count > 0:
|
||||
logger.debug(f"[Scheduler] Skipped {skipped_count} persona job(s) (already completed/failed or exist)")
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error restoring persona jobs: {e}")
|
||||
|
||||
196
backend/services/scheduler/core/oauth_task_restoration.py
Normal file
196
backend/services/scheduler/core/oauth_task_restoration.py
Normal file
@@ -0,0 +1,196 @@
|
||||
"""
|
||||
OAuth Token Monitoring Task Restoration
|
||||
Automatically creates missing OAuth monitoring tasks for users who have connected platforms
|
||||
but don't have monitoring tasks created yet.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
from services.oauth_token_monitoring_service import get_connected_platforms, create_oauth_monitoring_tasks
|
||||
|
||||
# Use service logger for consistent logging (WARNING level visible in production)
|
||||
logger = get_service_logger("oauth_task_restoration")
|
||||
|
||||
|
||||
async def restore_oauth_monitoring_tasks(scheduler):
|
||||
"""
|
||||
Restore/create missing OAuth token monitoring tasks for all users.
|
||||
|
||||
This checks all users who have connected platforms and ensures they have
|
||||
monitoring tasks created. Tasks are created for platforms that are:
|
||||
- Connected (detected via get_connected_platforms)
|
||||
- Missing monitoring tasks (no OAuthTokenMonitoringTask exists)
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
try:
|
||||
logger.warning("[OAuth Task Restoration] Starting OAuth monitoring task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[OAuth Task Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Get all existing OAuth tasks to find unique user_ids
|
||||
existing_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Log existing tasks breakdown by platform
|
||||
existing_by_platform = {}
|
||||
for task in existing_tasks:
|
||||
existing_by_platform[task.platform] = existing_by_platform.get(task.platform, 0) + 1
|
||||
|
||||
platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(existing_by_platform.items())])
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] Found {len(existing_tasks)} existing OAuth tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Platforms: {platform_summary}"
|
||||
)
|
||||
|
||||
# Check users who already have at least one OAuth task
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
# Also query all users from onboarding who completed step 5 (integrations)
|
||||
# to catch users who connected platforms but tasks weren't created
|
||||
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
|
||||
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
|
||||
# This matches the logic used in home page redirect and persona generation checks
|
||||
try:
|
||||
from services.onboarding.progress_service import get_onboarding_progress_service
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
|
||||
# Get onboarding progress service (same as used throughout the app)
|
||||
progress_service = get_onboarding_progress_service()
|
||||
|
||||
# Query all sessions and filter using the same completion logic as the service
|
||||
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
|
||||
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
|
||||
completed_sessions = db.query(OnboardingSession).filter(
|
||||
or_(
|
||||
OnboardingSession.current_step >= 6,
|
||||
OnboardingSession.progress >= 100.0
|
||||
)
|
||||
).all()
|
||||
|
||||
# Validate using the service method for consistency
|
||||
onboarding_user_ids = set()
|
||||
for session in completed_sessions:
|
||||
# Use the same service method as the rest of the app
|
||||
status = progress_service.get_onboarding_status(session.user_id)
|
||||
if status.get('is_completed', False):
|
||||
onboarding_user_ids.add(session.user_id)
|
||||
all_user_ids = users_to_check.copy()
|
||||
|
||||
# Add users from onboarding who might not have tasks yet
|
||||
for user_id in onboarding_user_ids:
|
||||
if user_id not in all_user_ids:
|
||||
all_user_ids.append(user_id)
|
||||
|
||||
users_to_check = all_user_ids
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] Checking {len(users_to_check)} users "
|
||||
f"({len(user_ids_with_tasks)} with existing tasks, "
|
||||
f"{len(onboarding_user_ids)} from onboarding sessions, "
|
||||
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Task Restoration] Could not query onboarding users: {e}")
|
||||
# Fallback to users with existing tasks only
|
||||
|
||||
total_created = 0
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
# Get connected platforms for this user
|
||||
connected_platforms = get_connected_platforms(user_id)
|
||||
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] User {user_id}: "
|
||||
f"Connected platforms: {connected_platforms}"
|
||||
)
|
||||
|
||||
if not connected_platforms:
|
||||
logger.debug(
|
||||
f"[OAuth Task Restoration] No connected platforms for user {user_id}, skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
# Check which platforms are missing tasks
|
||||
existing_platforms = {
|
||||
task.platform
|
||||
for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
}
|
||||
|
||||
missing_platforms = [
|
||||
platform
|
||||
for platform in connected_platforms
|
||||
if platform not in existing_platforms
|
||||
]
|
||||
|
||||
if missing_platforms:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ⚠️ User {user_id} has connected platforms "
|
||||
f"{connected_platforms} but missing tasks for: {missing_platforms}"
|
||||
)
|
||||
|
||||
# Create missing tasks
|
||||
created = create_oauth_monitoring_tasks(
|
||||
user_id=user_id,
|
||||
db=db,
|
||||
platforms=missing_platforms
|
||||
)
|
||||
|
||||
total_created += len(created)
|
||||
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ Created {len(created)} missing OAuth tasks "
|
||||
f"for user {user_id}, platforms: {missing_platforms}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ User {user_id} has all required tasks "
|
||||
f"for connected platforms: {connected_platforms}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] Error checking/creating tasks for user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
continue
|
||||
|
||||
# Final summary log with platform breakdown
|
||||
final_existing_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
final_by_platform = {}
|
||||
for task in final_existing_tasks:
|
||||
final_by_platform[task.platform] = final_by_platform.get(task.platform, 0) + 1
|
||||
|
||||
final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())])
|
||||
|
||||
if total_created > 0:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ Created {total_created} missing OAuth monitoring tasks. "
|
||||
f"Final platform breakdown: {final_platform_summary}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
|
||||
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks. "
|
||||
f"Platform breakdown: {final_platform_summary}"
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[OAuth Task Restoration] Error restoring OAuth monitoring tasks: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
@@ -10,6 +10,7 @@ from datetime import datetime
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
from apscheduler.triggers.date import DateTrigger
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .executor_interface import TaskExecutor, TaskExecutionResult
|
||||
@@ -20,6 +21,13 @@ from .exception_handler import (
|
||||
)
|
||||
from services.database import get_db_session
|
||||
from utils.logger_utils import get_service_logger
|
||||
from ..utils.user_job_store import get_user_job_store_name
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed
|
||||
from .job_restoration import restore_persona_jobs
|
||||
from .oauth_task_restoration import restore_oauth_monitoring_tasks
|
||||
from .check_cycle_handler import check_and_execute_due_tasks
|
||||
from .task_execution_handler import execute_task_async
|
||||
|
||||
logger = get_service_logger("task_scheduler")
|
||||
|
||||
@@ -34,6 +42,14 @@ class TaskScheduler:
|
||||
- Database-backed task persistence
|
||||
- Configurable check intervals
|
||||
- Automatic retry logic
|
||||
- User isolation: All tasks are filtered by user_id for isolation
|
||||
- Per-user job store context: Logs show user's website root for debugging
|
||||
|
||||
User Isolation:
|
||||
- Tasks are filtered by user_id in task loaders
|
||||
- Execution logs include user_id for tracking
|
||||
- Per-user statistics are maintained
|
||||
- Job store names (based on website root) are logged for debugging
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -63,7 +79,7 @@ class TaskScheduler:
|
||||
job_defaults={
|
||||
'coalesce': True,
|
||||
'max_instances': 1,
|
||||
'misfire_grace_time': 300 # 5 minutes grace period
|
||||
'misfire_grace_time': 3600 # 1 hour grace period for missed jobs
|
||||
}
|
||||
)
|
||||
|
||||
@@ -89,6 +105,7 @@ class TaskScheduler:
|
||||
'tasks_failed': 0,
|
||||
'tasks_skipped': 0,
|
||||
'last_check': None,
|
||||
'last_update': datetime.utcnow().isoformat(), # Timestamp for frontend polling
|
||||
'per_user_stats': {}, # Track metrics per user for user isolation
|
||||
'active_strategies_count': 0, # Track active strategies with tasks
|
||||
'last_interval_adjustment': None # Track when interval was last adjusted
|
||||
@@ -141,7 +158,11 @@ class TaskScheduler:
|
||||
|
||||
try:
|
||||
# Determine initial check interval based on active strategies
|
||||
initial_interval = await self._determine_optimal_interval()
|
||||
initial_interval = await determine_optimal_interval(
|
||||
self,
|
||||
self.min_check_interval_minutes,
|
||||
self.max_check_interval_minutes
|
||||
)
|
||||
self.current_check_interval_minutes = initial_interval
|
||||
|
||||
# Add periodic job to check for due tasks
|
||||
@@ -155,16 +176,228 @@ class TaskScheduler:
|
||||
self.scheduler.start()
|
||||
self._running = True
|
||||
|
||||
logger.info(
|
||||
f"Task scheduler started | "
|
||||
f"check_interval={initial_interval}min | "
|
||||
f"registered_types={self.registry.get_registered_types()}"
|
||||
)
|
||||
# Check for and execute any missed jobs that are still within grace period
|
||||
await self._execute_missed_jobs()
|
||||
|
||||
# Restore one-time persona generation jobs for users who completed onboarding
|
||||
await restore_persona_jobs(self)
|
||||
|
||||
# Restore/create missing OAuth token monitoring tasks for connected platforms
|
||||
await restore_oauth_monitoring_tasks(self)
|
||||
|
||||
# Get all scheduled APScheduler jobs (including one-time tasks)
|
||||
all_jobs = self.scheduler.get_jobs()
|
||||
registered_types = self.registry.get_registered_types()
|
||||
active_strategies = self.stats.get('active_strategies_count', 0)
|
||||
|
||||
# Count OAuth token monitoring tasks from database (recurring weekly tasks)
|
||||
oauth_tasks_count = 0
|
||||
oauth_tasks_details = []
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
# Count active tasks
|
||||
oauth_tasks_count = db.query(OAuthTokenMonitoringTask).filter(
|
||||
OAuthTokenMonitoringTask.status == 'active'
|
||||
).count()
|
||||
|
||||
# Get all tasks (for detailed logging)
|
||||
all_oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
total_oauth_tasks = len(all_oauth_tasks)
|
||||
|
||||
# Show platform breakdown for ALL tasks (active and inactive)
|
||||
all_platforms = {}
|
||||
active_platforms = {}
|
||||
for task in all_oauth_tasks:
|
||||
all_platforms[task.platform] = all_platforms.get(task.platform, 0) + 1
|
||||
if task.status == 'active':
|
||||
active_platforms[task.platform] = active_platforms.get(task.platform, 0) + 1
|
||||
|
||||
if total_oauth_tasks > 0:
|
||||
# Log details about all tasks (not just active)
|
||||
for task in all_oauth_tasks:
|
||||
oauth_tasks_details.append(
|
||||
f"user={task.user_id}, platform={task.platform}, status={task.status}"
|
||||
)
|
||||
|
||||
if total_oauth_tasks > 0 and oauth_tasks_count == 0:
|
||||
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
|
||||
logger.warning(
|
||||
f"[Scheduler] Found {total_oauth_tasks} OAuth monitoring tasks in database, "
|
||||
f"but {oauth_tasks_count} are active. "
|
||||
f"All platforms: {all_platform_summary}. "
|
||||
f"Task details: {', '.join(oauth_tasks_details[:5])}" # Limit to first 5 for readability
|
||||
)
|
||||
elif oauth_tasks_count > 0:
|
||||
# Show platform breakdown for active tasks
|
||||
active_platform_summary = ", ".join([f"{platform}: {count}" for platform, count in sorted(active_platforms.items())])
|
||||
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
|
||||
|
||||
# Check for missing platforms (expected: gsc, bing, wordpress, wix)
|
||||
expected_platforms = ['gsc', 'bing', 'wordpress', 'wix']
|
||||
missing_in_db = [p for p in expected_platforms if p not in all_platforms]
|
||||
|
||||
if missing_in_db:
|
||||
logger.warning(
|
||||
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
|
||||
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
|
||||
f"All platforms: {all_platform_summary}. "
|
||||
f"⚠️ Missing platforms (not connected or no tasks): {', '.join(missing_in_db)}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
|
||||
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
|
||||
f"All platforms: {all_platform_summary}"
|
||||
)
|
||||
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Scheduler] Could not get OAuth token monitoring tasks count: {e}. "
|
||||
f"This may indicate the oauth_token_monitoring_tasks table doesn't exist yet or "
|
||||
f"tasks haven't been created. Error type: {type(e).__name__}"
|
||||
)
|
||||
|
||||
# Calculate job counts
|
||||
apscheduler_recurring = 1 # check_due_tasks
|
||||
apscheduler_one_time = len(all_jobs) - 1
|
||||
total_recurring = apscheduler_recurring + oauth_tasks_count
|
||||
total_jobs = len(all_jobs) + oauth_tasks_count
|
||||
|
||||
# Build comprehensive startup log message
|
||||
startup_lines = [
|
||||
f"[Scheduler] ✅ Task Scheduler Started",
|
||||
f" ├─ Check Interval: {initial_interval} minutes",
|
||||
f" ├─ Registered Task Types: {len(registered_types)} ({', '.join(registered_types) if registered_types else 'none'})",
|
||||
f" ├─ Active Strategies: {active_strategies}",
|
||||
f" ├─ Total Scheduled Jobs: {total_jobs}",
|
||||
f" ├─ Recurring Jobs: {total_recurring} (check_due_tasks: {apscheduler_recurring}, OAuth monitoring: {oauth_tasks_count})",
|
||||
f" └─ One-Time Jobs: {apscheduler_one_time}"
|
||||
]
|
||||
|
||||
# Add APScheduler job details
|
||||
if all_jobs:
|
||||
for idx, job in enumerate(all_jobs):
|
||||
is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
next_run = job.next_run_time
|
||||
trigger_type = type(job.trigger).__name__
|
||||
|
||||
# Try to extract user_id from job ID or kwargs for context
|
||||
user_context = ""
|
||||
user_id_from_job = None
|
||||
|
||||
# First try to get from kwargs
|
||||
if hasattr(job, 'kwargs') and job.kwargs and job.kwargs.get('user_id'):
|
||||
user_id_from_job = job.kwargs.get('user_id')
|
||||
# Otherwise, try to extract from job ID (e.g., "research_persona_user_123..." or "research_persona_user123")
|
||||
elif job.id and ('research_persona_' in job.id or 'facebook_persona_' in job.id):
|
||||
# Job ID format: research_persona_{user_id} or facebook_persona_{user_id}
|
||||
# where user_id is Clerk format (e.g., "user_33Gz1FPI86VDXhRY8QN4ragRFGN")
|
||||
if job.id.startswith('research_persona_'):
|
||||
user_id_from_job = job.id.replace('research_persona_', '')
|
||||
elif job.id.startswith('facebook_persona_'):
|
||||
user_id_from_job = job.id.replace('facebook_persona_', '')
|
||||
else:
|
||||
# Fallback: try to extract from parts (old format with timestamp)
|
||||
parts = job.id.split('_')
|
||||
if len(parts) >= 3:
|
||||
user_id_from_job = parts[2] # Extract user_id from job ID
|
||||
|
||||
if user_id_from_job:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
user_job_store = get_user_job_store_name(user_id_from_job, db)
|
||||
if user_job_store == 'default':
|
||||
logger.debug(
|
||||
f"[Scheduler] Job store extraction returned 'default' for user {user_id_from_job}. "
|
||||
f"This may indicate no onboarding data or website URL not found."
|
||||
)
|
||||
user_context = f" | User: {user_id_from_job} | Store: {user_job_store}"
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Scheduler] Could not extract job store name for user {user_id_from_job}: {e}. "
|
||||
f"Error type: {type(e).__name__}"
|
||||
)
|
||||
user_context = f" | User: {user_id_from_job}"
|
||||
|
||||
startup_lines.append(f"{prefix} Job: {job.id} | Trigger: {trigger_type} | Next Run: {next_run}{user_context}")
|
||||
|
||||
# Add OAuth token monitoring tasks details
|
||||
# Show ALL OAuth tasks (active and inactive) for complete visibility
|
||||
if total_oauth_tasks > 0:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
# Get ALL tasks, not just active ones
|
||||
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
|
||||
for idx, task in enumerate(oauth_tasks):
|
||||
is_last = idx == len(oauth_tasks) - 1 and len(all_jobs) == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
if user_job_store == 'default':
|
||||
logger.debug(
|
||||
f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
|
||||
f"This may indicate no onboarding data or website URL not found."
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
|
||||
f"Using 'default'. Error type: {type(e).__name__}"
|
||||
)
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
# Include status in the log line for visibility
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
|
||||
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get OAuth token monitoring task details: {e}")
|
||||
|
||||
# Log comprehensive startup information in single message
|
||||
logger.warning("\n".join(startup_lines))
|
||||
|
||||
# Save scheduler start event to database
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='start',
|
||||
event_date=datetime.utcnow(),
|
||||
check_interval_minutes=initial_interval,
|
||||
active_strategies_count=active_strategies,
|
||||
event_data={
|
||||
'registered_types': registered_types,
|
||||
'total_jobs': total_jobs,
|
||||
'recurring_jobs': total_recurring,
|
||||
'one_time_jobs': apscheduler_one_time,
|
||||
'oauth_monitoring_tasks': oauth_tasks_count
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save scheduler start event log: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start scheduler: {e}")
|
||||
raise
|
||||
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the scheduler gracefully."""
|
||||
if not self._running:
|
||||
@@ -182,11 +415,48 @@ class TaskScheduler:
|
||||
timeout=30
|
||||
)
|
||||
|
||||
# Get final job count before shutdown
|
||||
all_jobs_before = self.scheduler.get_jobs()
|
||||
|
||||
# Shutdown scheduler
|
||||
self.scheduler.shutdown(wait=True)
|
||||
self._running = False
|
||||
|
||||
logger.info("Task scheduler stopped gracefully")
|
||||
# Log comprehensive shutdown information (use WARNING level for visibility)
|
||||
total_checks = self.stats.get('total_checks', 0)
|
||||
total_executed = self.stats.get('tasks_executed', 0)
|
||||
total_failed = self.stats.get('tasks_failed', 0)
|
||||
|
||||
shutdown_message = (
|
||||
f"[Scheduler] 🛑 Task Scheduler Stopped\n"
|
||||
f" ├─ Total Check Cycles: {total_checks}\n"
|
||||
f" ├─ Total Tasks Executed: {total_executed}\n"
|
||||
f" ├─ Total Tasks Failed: {total_failed}\n"
|
||||
f" ├─ Jobs Cancelled: {len(all_jobs_before)}\n"
|
||||
f" └─ Shutdown: Graceful"
|
||||
)
|
||||
logger.warning(shutdown_message)
|
||||
|
||||
# Save scheduler stop event to database
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='stop',
|
||||
event_date=datetime.utcnow(),
|
||||
check_interval_minutes=self.current_check_interval_minutes,
|
||||
event_data={
|
||||
'total_checks': total_checks,
|
||||
'total_executed': total_executed,
|
||||
'total_failed': total_failed,
|
||||
'jobs_cancelled': len(all_jobs_before)
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save scheduler stop event log: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping scheduler: {e}")
|
||||
@@ -197,109 +467,50 @@ class TaskScheduler:
|
||||
Main scheduler loop: check for due tasks and execute them.
|
||||
This runs periodically with intelligent interval adjustment based on active strategies.
|
||||
"""
|
||||
self.stats['total_checks'] += 1
|
||||
self.stats['last_check'] = datetime.utcnow().isoformat()
|
||||
|
||||
logger.debug("Checking for due tasks...")
|
||||
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db is None:
|
||||
logger.error("Failed to get database session")
|
||||
return
|
||||
|
||||
# Check for active strategies and adjust interval intelligently
|
||||
await self._adjust_check_interval_if_needed(db)
|
||||
|
||||
# Check each registered task type
|
||||
for task_type in self.registry.get_registered_types():
|
||||
await self._process_task_type(task_type, db)
|
||||
|
||||
except Exception as e:
|
||||
error = DatabaseError(
|
||||
message=f"Error checking for due tasks: {str(e)}",
|
||||
original_error=e
|
||||
)
|
||||
self.exception_handler.handle_exception(error)
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
async def _determine_optimal_interval(self) -> int:
|
||||
"""
|
||||
Determine optimal check interval based on active strategies.
|
||||
|
||||
Returns:
|
||||
Optimal check interval in minutes
|
||||
"""
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
self.stats['active_strategies_count'] = active_count
|
||||
|
||||
if active_count > 0:
|
||||
logger.info(f"Found {active_count} active strategies with tasks - using {self.min_check_interval_minutes}min interval")
|
||||
return self.min_check_interval_minutes
|
||||
else:
|
||||
logger.info(f"No active strategies with tasks - using {self.max_check_interval_minutes}min interval")
|
||||
return self.max_check_interval_minutes
|
||||
except Exception as e:
|
||||
logger.warning(f"Error determining optimal interval: {e}, using default {self.min_check_interval_minutes}min")
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
# Default to shorter interval on error (safer)
|
||||
return self.min_check_interval_minutes
|
||||
await check_and_execute_due_tasks(self)
|
||||
|
||||
async def _adjust_check_interval_if_needed(self, db: Session):
|
||||
"""
|
||||
Intelligently adjust check interval based on active strategies.
|
||||
|
||||
If there are active strategies with tasks, check more frequently.
|
||||
If there are no active strategies, check less frequently.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
"""
|
||||
await adjust_check_interval_if_needed(self, db)
|
||||
|
||||
async def _execute_missed_jobs(self):
|
||||
"""
|
||||
Check for and execute any missed DateTrigger jobs that are still within grace period.
|
||||
APScheduler marks jobs as 'missed' if they were scheduled to run while the scheduler wasn't running.
|
||||
"""
|
||||
try:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
all_jobs = self.scheduler.get_jobs()
|
||||
now = datetime.utcnow().replace(tzinfo=self.scheduler.timezone)
|
||||
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
self.stats['active_strategies_count'] = active_count
|
||||
missed_jobs = []
|
||||
for job in all_jobs:
|
||||
# Only check DateTrigger jobs (one-time tasks)
|
||||
if hasattr(job, 'trigger') and isinstance(job.trigger, DateTrigger):
|
||||
if job.next_run_time and job.next_run_time < now:
|
||||
# Job's scheduled time has passed
|
||||
time_since_scheduled = (now - job.next_run_time).total_seconds()
|
||||
# Check if still within grace period (1 hour = 3600 seconds)
|
||||
if time_since_scheduled <= 3600:
|
||||
missed_jobs.append(job)
|
||||
|
||||
# Determine optimal interval
|
||||
if active_count > 0:
|
||||
optimal_interval = self.min_check_interval_minutes
|
||||
else:
|
||||
optimal_interval = self.max_check_interval_minutes
|
||||
|
||||
# Only reschedule if interval needs to change
|
||||
if optimal_interval != self.current_check_interval_minutes:
|
||||
logger.info(
|
||||
f"Adjusting scheduler interval: {self.current_check_interval_minutes}min → {optimal_interval}min | "
|
||||
f"active_strategies={active_count}"
|
||||
if missed_jobs:
|
||||
logger.warning(
|
||||
f"[Scheduler] Found {len(missed_jobs)} missed job(s) within grace period, executing now..."
|
||||
)
|
||||
|
||||
# Reschedule the job with new interval
|
||||
self.scheduler.modify_job(
|
||||
'check_due_tasks',
|
||||
trigger=self._get_trigger_for_interval(optimal_interval)
|
||||
)
|
||||
|
||||
self.current_check_interval_minutes = optimal_interval
|
||||
self.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
|
||||
|
||||
logger.info(f"Scheduler interval adjusted to {optimal_interval}min")
|
||||
|
||||
for job in missed_jobs:
|
||||
try:
|
||||
# Execute the job immediately
|
||||
logger.info(f"[Scheduler] Executing missed job: {job.id}")
|
||||
await job.func(*job.args, **job.kwargs)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error executing missed job {job.id}: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error adjusting check interval: {e}")
|
||||
logger.warning(f"[Scheduler] Error checking for missed jobs: {e}")
|
||||
|
||||
async def trigger_interval_adjustment(self):
|
||||
"""
|
||||
@@ -315,14 +526,22 @@ class TaskScheduler:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
await self._adjust_check_interval_if_needed(db)
|
||||
await adjust_check_interval_if_needed(self, db)
|
||||
db.close()
|
||||
else:
|
||||
logger.warning("Could not get database session for interval adjustment")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error triggering interval adjustment: {e}")
|
||||
|
||||
async def _process_task_type(self, task_type: str, db: Session):
|
||||
"""Process due tasks for a specific task type."""
|
||||
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Process due tasks for a specific task type.
|
||||
|
||||
Returns:
|
||||
Summary dict with 'found', 'executed', 'failed' counts, or None if no tasks
|
||||
"""
|
||||
summary = {'found': 0, 'executed': 0, 'failed': 0}
|
||||
|
||||
try:
|
||||
# Get task loader for this type
|
||||
try:
|
||||
@@ -334,7 +553,7 @@ class TaskScheduler:
|
||||
original_error=e
|
||||
)
|
||||
self.exception_handler.handle_exception(error)
|
||||
return
|
||||
return None
|
||||
|
||||
# Load due tasks (with error handling)
|
||||
try:
|
||||
@@ -346,28 +565,30 @@ class TaskScheduler:
|
||||
original_error=e
|
||||
)
|
||||
self.exception_handler.handle_exception(error)
|
||||
return
|
||||
return None
|
||||
|
||||
if not due_tasks:
|
||||
return
|
||||
return None
|
||||
|
||||
summary['found'] = len(due_tasks)
|
||||
self.stats['tasks_found'] += len(due_tasks)
|
||||
logger.info(f"Found {len(due_tasks)} due tasks for type: {task_type}")
|
||||
|
||||
# Execute tasks (with concurrency limit)
|
||||
execution_tasks = []
|
||||
skipped_count = 0
|
||||
for task in due_tasks:
|
||||
if len(self.active_executions) >= self.max_concurrent_executions:
|
||||
skipped_count = len(due_tasks) - len(execution_tasks)
|
||||
logger.warning(
|
||||
f"Max concurrent executions reached ({self.max_concurrent_executions}), "
|
||||
f"skipping {len(due_tasks) - len(execution_tasks)} tasks"
|
||||
f"[Scheduler] ⚠️ Max concurrent executions reached ({self.max_concurrent_executions}), "
|
||||
f"skipping {skipped_count} tasks for {task_type}"
|
||||
)
|
||||
break
|
||||
|
||||
# Execute task asynchronously
|
||||
# Note: Each task gets its own database session to prevent concurrent access issues
|
||||
execution_task = asyncio.create_task(
|
||||
self._execute_task_async(task_type, task)
|
||||
execute_task_async(self, task_type, task, summary)
|
||||
)
|
||||
|
||||
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
|
||||
@@ -379,6 +600,8 @@ class TaskScheduler:
|
||||
if execution_tasks:
|
||||
await asyncio.wait(execution_tasks, timeout=300)
|
||||
|
||||
return summary
|
||||
|
||||
except Exception as e:
|
||||
error = TaskLoaderError(
|
||||
message=f"Error processing task type {task_type}: {str(e)}",
|
||||
@@ -386,169 +609,8 @@ class TaskScheduler:
|
||||
original_error=e
|
||||
)
|
||||
self.exception_handler.handle_exception(error)
|
||||
return summary
|
||||
|
||||
async def _execute_task_async(self, task_type: str, task: Any):
|
||||
"""
|
||||
Execute a single task asynchronously with user isolation.
|
||||
|
||||
Each task gets its own database session to prevent concurrent access issues,
|
||||
as SQLAlchemy sessions are not async-safe or concurrent-safe.
|
||||
|
||||
User context is extracted and tracked for user isolation.
|
||||
|
||||
Args:
|
||||
task_type: Type of task
|
||||
task: Task instance from database (detached from original session)
|
||||
"""
|
||||
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
|
||||
db = None
|
||||
user_id = None
|
||||
|
||||
try:
|
||||
# Extract user context if available (for user isolation tracking)
|
||||
try:
|
||||
if hasattr(task, 'strategy') and task.strategy:
|
||||
user_id = getattr(task.strategy, 'user_id', None)
|
||||
elif hasattr(task, 'strategy_id') and task.strategy_id:
|
||||
# Will query user_id after we have db session
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
|
||||
|
||||
logger.info(f"Executing task: {task_id} | user_id: {user_id}")
|
||||
|
||||
# Create a new database session for this async task
|
||||
# SQLAlchemy sessions are not async-safe and cannot be shared across concurrent tasks
|
||||
db = get_db_session()
|
||||
if db is None:
|
||||
error = DatabaseError(
|
||||
message=f"Failed to get database session for task {task_id}",
|
||||
user_id=user_id,
|
||||
task_id=getattr(task, 'id', None),
|
||||
task_type=task_type
|
||||
)
|
||||
self.exception_handler.handle_exception(error, log_level="error")
|
||||
self.stats['tasks_failed'] += 1
|
||||
self._update_user_stats(user_id, success=False)
|
||||
return
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
# Merge the detached task object into this session
|
||||
# The task object was loaded in a different session and is now detached
|
||||
from sqlalchemy.orm import object_session
|
||||
if object_session(task) is None:
|
||||
# Task is detached, need to merge it into this session
|
||||
task = db.merge(task)
|
||||
|
||||
# Extract user_id after merge if not already available
|
||||
if user_id is None and hasattr(task, 'strategy'):
|
||||
try:
|
||||
if task.strategy:
|
||||
user_id = getattr(task.strategy, 'user_id', None)
|
||||
elif hasattr(task, 'strategy_id'):
|
||||
# Query strategy if relationship not loaded
|
||||
from models.enhanced_strategy_models import EnhancedContentStrategy
|
||||
strategy = db.query(EnhancedContentStrategy).filter(
|
||||
EnhancedContentStrategy.id == task.strategy_id
|
||||
).first()
|
||||
if strategy:
|
||||
user_id = strategy.user_id
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract user_id after merge for task {task_id}: {e}")
|
||||
|
||||
# Get executor for this task type
|
||||
try:
|
||||
executor = self.registry.get_executor(task_type)
|
||||
except Exception as e:
|
||||
from .exception_handler import SchedulerConfigError
|
||||
error = SchedulerConfigError(
|
||||
message=f"Failed to get executor for task type {task_type}: {str(e)}",
|
||||
user_id=user_id,
|
||||
context={
|
||||
"task_id": getattr(task, 'id', None),
|
||||
"task_type": task_type
|
||||
},
|
||||
original_error=e
|
||||
)
|
||||
self.exception_handler.handle_exception(error)
|
||||
self.stats['tasks_failed'] += 1
|
||||
self._update_user_stats(user_id, success=False)
|
||||
return
|
||||
|
||||
# Execute task with its own session (with error handling)
|
||||
try:
|
||||
result = await executor.execute_task(task, db)
|
||||
|
||||
# Handle result and update statistics
|
||||
if result.success:
|
||||
self.stats['tasks_executed'] += 1
|
||||
self._update_user_stats(user_id, success=True)
|
||||
logger.info(f"Task executed successfully: {task_id} | user_id: {user_id}")
|
||||
else:
|
||||
self.stats['tasks_failed'] += 1
|
||||
self._update_user_stats(user_id, success=False)
|
||||
|
||||
# Create structured error for failed execution
|
||||
error = TaskExecutionError(
|
||||
message=result.error_message or "Task execution failed",
|
||||
user_id=user_id,
|
||||
task_id=getattr(task, 'id', None),
|
||||
task_type=task_type,
|
||||
execution_time_ms=result.execution_time_ms,
|
||||
context={"result_data": result.result_data}
|
||||
)
|
||||
self.exception_handler.handle_exception(error, log_level="warning")
|
||||
|
||||
# Retry logic if enabled
|
||||
if self.enable_retries and result.retryable:
|
||||
await self._schedule_retry(task, result.retry_delay)
|
||||
|
||||
except SchedulerException as e:
|
||||
# Re-raise scheduler exceptions (they're already handled)
|
||||
raise
|
||||
except Exception as e:
|
||||
# Wrap unexpected exceptions
|
||||
error = TaskExecutionError(
|
||||
message=f"Unexpected error during task execution: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=getattr(task, 'id', None),
|
||||
task_type=task_type,
|
||||
original_error=e
|
||||
)
|
||||
self.exception_handler.handle_exception(error)
|
||||
self.stats['tasks_failed'] += 1
|
||||
self._update_user_stats(user_id, success=False)
|
||||
|
||||
except SchedulerException as e:
|
||||
# Handle scheduler exceptions
|
||||
self.exception_handler.handle_exception(e)
|
||||
self.stats['tasks_failed'] += 1
|
||||
self._update_user_stats(user_id, success=False)
|
||||
except Exception as e:
|
||||
# Handle any other unexpected errors
|
||||
error = TaskExecutionError(
|
||||
message=f"Unexpected error in task execution wrapper: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=getattr(task, 'id', None),
|
||||
task_type=task_type,
|
||||
original_error=e
|
||||
)
|
||||
self.exception_handler.handle_exception(error)
|
||||
self.stats['tasks_failed'] += 1
|
||||
self._update_user_stats(user_id, success=False)
|
||||
finally:
|
||||
# Clean up database session
|
||||
if db:
|
||||
try:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Error closing database session for task {task_id}: {e}")
|
||||
|
||||
# Remove from active executions
|
||||
if task_id in self.active_executions:
|
||||
del self.active_executions[task_id]
|
||||
|
||||
def _update_user_stats(self, user_id: Optional[int], success: bool):
|
||||
"""
|
||||
@@ -622,6 +684,117 @@ class TaskScheduler:
|
||||
|
||||
return base_stats
|
||||
|
||||
def schedule_one_time_task(
|
||||
self,
|
||||
func: Callable,
|
||||
run_date: datetime,
|
||||
job_id: str,
|
||||
args: tuple = (),
|
||||
kwargs: Dict[str, Any] = None,
|
||||
replace_existing: bool = True
|
||||
) -> str:
|
||||
"""
|
||||
Schedule a one-time task to run at a specific datetime.
|
||||
|
||||
Args:
|
||||
func: Async function to execute
|
||||
run_date: Datetime when the task should run (must be timezone-aware UTC)
|
||||
job_id: Unique identifier for this job
|
||||
args: Positional arguments to pass to func
|
||||
kwargs: Keyword arguments to pass to func
|
||||
replace_existing: If True, replace existing job with same ID
|
||||
|
||||
Returns:
|
||||
Job ID
|
||||
"""
|
||||
if not self._running:
|
||||
logger.warning(
|
||||
f"Scheduler not running, but scheduling job {job_id} anyway. "
|
||||
"APScheduler will start automatically when needed."
|
||||
)
|
||||
|
||||
try:
|
||||
# Ensure run_date is timezone-aware (UTC)
|
||||
if run_date.tzinfo is None:
|
||||
from datetime import timezone
|
||||
run_date = run_date.replace(tzinfo=timezone.utc)
|
||||
logger.debug(f"Added UTC timezone to run_date: {run_date}")
|
||||
|
||||
self.scheduler.add_job(
|
||||
func,
|
||||
trigger=DateTrigger(run_date=run_date),
|
||||
args=args,
|
||||
kwargs=kwargs or {},
|
||||
id=job_id,
|
||||
replace_existing=replace_existing,
|
||||
misfire_grace_time=3600 # 1 hour grace period for missed jobs
|
||||
)
|
||||
|
||||
# Get updated job count
|
||||
all_jobs = self.scheduler.get_jobs()
|
||||
one_time_jobs = [j for j in all_jobs if j.id != 'check_due_tasks']
|
||||
|
||||
# Extract user_id from kwargs if available for logging and job store
|
||||
user_id = kwargs.get('user_id', None) if kwargs else None
|
||||
func_name = func.__name__ if hasattr(func, '__name__') else str(func)
|
||||
|
||||
# Get job store name for user (if user_id provided)
|
||||
job_store_name = 'default'
|
||||
if user_id:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
job_store_name = get_user_job_store_name(user_id, db)
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not determine job store for user {user_id}: {e}")
|
||||
|
||||
# Note: APScheduler doesn't support dynamic job store creation
|
||||
# We use 'default' for all jobs but log the user's job store name for debugging
|
||||
# The actual user isolation is handled through task filtering by user_id
|
||||
|
||||
# Log detailed one-time task scheduling information (use WARNING level for visibility)
|
||||
log_message = (
|
||||
f"[Scheduler] 📅 Scheduled One-Time Task\n"
|
||||
f" ├─ Job ID: {job_id}\n"
|
||||
f" ├─ Function: {func_name}\n"
|
||||
f" ├─ User ID: {user_id or 'system'}\n"
|
||||
f" ├─ Job Store: {job_store_name} (user context)\n"
|
||||
f" ├─ Scheduled For: {run_date}\n"
|
||||
f" ├─ Replace Existing: {replace_existing}\n"
|
||||
f" ├─ Total One-Time Jobs: {len(one_time_jobs)}\n"
|
||||
f" └─ Total Scheduled Jobs: {len(all_jobs)}"
|
||||
)
|
||||
logger.warning(log_message)
|
||||
|
||||
# Log job scheduling to event log for dashboard
|
||||
try:
|
||||
event_db = get_db_session()
|
||||
if event_db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_scheduled',
|
||||
event_date=datetime.utcnow(),
|
||||
job_id=job_id,
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
event_data={
|
||||
'function_name': func_name,
|
||||
'job_store': job_store_name,
|
||||
'scheduled_for': run_date.isoformat(),
|
||||
'replace_existing': replace_existing
|
||||
}
|
||||
)
|
||||
event_db.add(event_log)
|
||||
event_db.commit()
|
||||
event_db.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to log job scheduling event: {e}")
|
||||
|
||||
return job_id
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to schedule one-time task {job_id}: {e}")
|
||||
raise
|
||||
|
||||
def is_running(self) -> bool:
|
||||
"""Check if scheduler is running."""
|
||||
return self._running
|
||||
|
||||
197
backend/services/scheduler/core/task_execution_handler.py
Normal file
197
backend/services/scheduler/core/task_execution_handler.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""
|
||||
Task Execution Handler
|
||||
Handles asynchronous execution of individual tasks with proper session isolation.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||
from sqlalchemy.orm import object_session
|
||||
|
||||
from services.database import get_db_session
|
||||
from utils.logger_utils import get_service_logger
|
||||
from .exception_handler import (
|
||||
SchedulerException, TaskExecutionError, DatabaseError, SchedulerConfigError
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .scheduler import TaskScheduler
|
||||
|
||||
logger = get_service_logger("task_execution_handler")
|
||||
|
||||
|
||||
async def execute_task_async(
|
||||
scheduler: 'TaskScheduler',
|
||||
task_type: str,
|
||||
task: Any,
|
||||
summary: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
"""
|
||||
Execute a single task asynchronously with user isolation.
|
||||
|
||||
Each task gets its own database session to prevent concurrent access issues,
|
||||
as SQLAlchemy sessions are not async-safe or concurrent-safe.
|
||||
|
||||
User context is extracted and tracked for user isolation.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
task_type: Type of task
|
||||
task: Task instance from database (detached from original session)
|
||||
summary: Optional summary dict to update with execution results
|
||||
"""
|
||||
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
|
||||
db = None
|
||||
user_id = None
|
||||
|
||||
try:
|
||||
# Extract user context if available (for user isolation tracking)
|
||||
try:
|
||||
if hasattr(task, 'strategy') and task.strategy:
|
||||
user_id = getattr(task.strategy, 'user_id', None)
|
||||
elif hasattr(task, 'strategy_id') and task.strategy_id:
|
||||
# Will query user_id after we have db session
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
|
||||
|
||||
# Log task execution start (detailed for important tasks)
|
||||
task_db_id = getattr(task, 'id', None)
|
||||
if task_db_id:
|
||||
logger.debug(f"[Scheduler] ▶️ Executing {task_type} task {task_db_id} | user_id: {user_id}")
|
||||
|
||||
# Create a new database session for this async task
|
||||
# SQLAlchemy sessions are not async-safe and cannot be shared across concurrent tasks
|
||||
db = get_db_session()
|
||||
if db is None:
|
||||
error = DatabaseError(
|
||||
message=f"Failed to get database session for task {task_id}",
|
||||
user_id=user_id,
|
||||
task_id=getattr(task, 'id', None),
|
||||
task_type=task_type
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error, log_level="error")
|
||||
scheduler.stats['tasks_failed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=False)
|
||||
return
|
||||
|
||||
# Set database session for exception handler
|
||||
scheduler.exception_handler.db = db
|
||||
|
||||
# Merge the detached task object into this session
|
||||
# The task object was loaded in a different session and is now detached
|
||||
if object_session(task) is None:
|
||||
# Task is detached, need to merge it into this session
|
||||
task = db.merge(task)
|
||||
|
||||
# Extract user_id after merge if not already available
|
||||
if user_id is None and hasattr(task, 'strategy'):
|
||||
try:
|
||||
if task.strategy:
|
||||
user_id = getattr(task.strategy, 'user_id', None)
|
||||
elif hasattr(task, 'strategy_id'):
|
||||
# Query strategy if relationship not loaded
|
||||
from models.enhanced_strategy_models import EnhancedContentStrategy
|
||||
strategy = db.query(EnhancedContentStrategy).filter(
|
||||
EnhancedContentStrategy.id == task.strategy_id
|
||||
).first()
|
||||
if strategy:
|
||||
user_id = strategy.user_id
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract user_id after merge for task {task_id}: {e}")
|
||||
|
||||
# Get executor for this task type
|
||||
try:
|
||||
executor = scheduler.registry.get_executor(task_type)
|
||||
except Exception as e:
|
||||
error = SchedulerConfigError(
|
||||
message=f"Failed to get executor for task type {task_type}: {str(e)}",
|
||||
user_id=user_id,
|
||||
context={
|
||||
"task_id": getattr(task, 'id', None),
|
||||
"task_type": task_type
|
||||
},
|
||||
original_error=e
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error)
|
||||
scheduler.stats['tasks_failed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=False)
|
||||
return
|
||||
|
||||
# Execute task with its own session (with error handling)
|
||||
try:
|
||||
result = await executor.execute_task(task, db)
|
||||
|
||||
# Handle result and update statistics
|
||||
if result.success:
|
||||
scheduler.stats['tasks_executed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=True)
|
||||
if summary:
|
||||
summary['executed'] += 1
|
||||
logger.debug(f"[Scheduler] ✅ Task {task_id} executed successfully | user_id: {user_id} | time: {result.execution_time_ms}ms")
|
||||
else:
|
||||
scheduler.stats['tasks_failed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=False)
|
||||
if summary:
|
||||
summary['failed'] += 1
|
||||
|
||||
# Create structured error for failed execution
|
||||
error = TaskExecutionError(
|
||||
message=result.error_message or "Task execution failed",
|
||||
user_id=user_id,
|
||||
task_id=getattr(task, 'id', None),
|
||||
task_type=task_type,
|
||||
execution_time_ms=result.execution_time_ms,
|
||||
context={"result_data": result.result_data}
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error, log_level="warning")
|
||||
|
||||
logger.warning(f"[Scheduler] ❌ Task {task_id} failed | user_id: {user_id} | error: {result.error_message}")
|
||||
|
||||
# Retry logic if enabled
|
||||
if scheduler.enable_retries and result.retryable:
|
||||
await scheduler._schedule_retry(task, result.retry_delay)
|
||||
|
||||
except SchedulerException as e:
|
||||
# Re-raise scheduler exceptions (they're already handled)
|
||||
raise
|
||||
except Exception as e:
|
||||
# Wrap unexpected exceptions
|
||||
error = TaskExecutionError(
|
||||
message=f"Unexpected error during task execution: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=getattr(task, 'id', None),
|
||||
task_type=task_type,
|
||||
original_error=e
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error)
|
||||
scheduler.stats['tasks_failed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=False)
|
||||
|
||||
except SchedulerException as e:
|
||||
# Handle scheduler exceptions
|
||||
scheduler.exception_handler.handle_exception(e)
|
||||
scheduler.stats['tasks_failed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=False)
|
||||
except Exception as e:
|
||||
# Handle any other unexpected errors
|
||||
error = TaskExecutionError(
|
||||
message=f"Unexpected error in task execution wrapper: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=getattr(task, 'id', None),
|
||||
task_type=task_type,
|
||||
original_error=e
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error)
|
||||
scheduler.stats['tasks_failed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=False)
|
||||
finally:
|
||||
# Clean up database session
|
||||
if db:
|
||||
try:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Error closing database session for task {task_id}: {e}")
|
||||
|
||||
# Remove from active executions
|
||||
if task_id in scheduler.active_executions:
|
||||
del scheduler.active_executions[task_id]
|
||||
|
||||
@@ -0,0 +1,756 @@
|
||||
"""
|
||||
OAuth Token Monitoring Task Executor
|
||||
Handles execution of OAuth token monitoring tasks for connected platforms.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask, OAuthTokenExecutionLog
|
||||
from models.subscription_models import UsageAlert
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
# Import platform-specific services
|
||||
from services.gsc_service import GSCService
|
||||
from services.integrations.bing_oauth import BingOAuthService
|
||||
from services.integrations.wordpress_oauth import WordPressOAuthService
|
||||
from services.wix_service import WixService
|
||||
|
||||
logger = get_service_logger("oauth_token_monitoring_executor")
|
||||
|
||||
|
||||
class OAuthTokenMonitoringExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for OAuth token monitoring tasks.
|
||||
|
||||
Handles:
|
||||
- Checking token validity and expiration
|
||||
- Attempting automatic token refresh
|
||||
- Logging results and updating task status
|
||||
- One-time refresh attempt (no automatic retries on failure)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
# Expiration warning window (7 days before expiration)
|
||||
self.expiration_warning_days = 7
|
||||
|
||||
async def execute_task(self, task: OAuthTokenMonitoringTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute an OAuth token monitoring task.
|
||||
|
||||
This checks token status and attempts refresh if needed.
|
||||
If refresh fails, marks task as failed and does not retry automatically.
|
||||
|
||||
Args:
|
||||
task: OAuthTokenMonitoringTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
user_id = task.user_id
|
||||
platform = task.platform
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing OAuth token monitoring: task_id={task.id} | "
|
||||
f"user_id={user_id} | platform={platform}"
|
||||
)
|
||||
|
||||
# Create execution log
|
||||
execution_log = OAuthTokenExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Check and refresh token
|
||||
result = await self._check_and_refresh_token(task, db)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
|
||||
# Update task based on result
|
||||
task.last_check = datetime.utcnow()
|
||||
|
||||
if result.success:
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = 'active'
|
||||
task.failure_reason = None
|
||||
# Schedule next check (7 days from now)
|
||||
task.next_check = self.calculate_next_execution(
|
||||
task=task,
|
||||
frequency='Weekly',
|
||||
last_execution=task.last_check
|
||||
)
|
||||
else:
|
||||
# Refresh failed - mark as failed and stop automatic retries
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = result.error_message
|
||||
task.status = 'failed'
|
||||
# Do NOT update next_check - wait for manual trigger
|
||||
self.logger.warning(
|
||||
f"OAuth token refresh failed for user {user_id}, platform {platform}. "
|
||||
f"Task marked as failed. No automatic retry will be scheduled."
|
||||
)
|
||||
|
||||
# Create UsageAlert notification for the user
|
||||
self._create_failure_alert(user_id, platform, result.error_message, result.result_data, db)
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
# Create structured error
|
||||
error = TaskExecutionError(
|
||||
message=f"Error executing OAuth token monitoring task {task.id}: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
task_type="oauth_token_monitoring",
|
||||
execution_time_ms=execution_time_ms,
|
||||
context={
|
||||
"platform": platform,
|
||||
"user_id": user_id
|
||||
},
|
||||
original_error=e
|
||||
)
|
||||
|
||||
# Handle exception with structured logging
|
||||
self.exception_handler.handle_exception(error)
|
||||
|
||||
# Update execution log with error
|
||||
try:
|
||||
execution_log = OAuthTokenExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='failed',
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
result_data={
|
||||
"error_type": error.error_type.value,
|
||||
"severity": error.severity.value,
|
||||
"context": error.context
|
||||
}
|
||||
)
|
||||
db.add(execution_log)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
task.status = 'failed'
|
||||
task.last_check = datetime.utcnow()
|
||||
task.updated_at = datetime.utcnow()
|
||||
# Do NOT update next_check - wait for manual trigger
|
||||
|
||||
# Create UsageAlert notification for the user
|
||||
self._create_failure_alert(user_id, task.platform, str(e), None, db)
|
||||
|
||||
db.commit()
|
||||
except Exception as commit_error:
|
||||
db_error = DatabaseError(
|
||||
message=f"Error saving execution log: {str(commit_error)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
original_error=commit_error
|
||||
)
|
||||
self.exception_handler.handle_exception(db_error)
|
||||
db.rollback()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
retryable=False, # Do not retry automatically
|
||||
retry_delay=0
|
||||
)
|
||||
|
||||
async def _check_and_refresh_token(
|
||||
self,
|
||||
task: OAuthTokenMonitoringTask,
|
||||
db: Session
|
||||
) -> TaskExecutionResult:
|
||||
"""
|
||||
Check token status and attempt refresh if needed.
|
||||
|
||||
Tokens are stored in the database from onboarding step 5:
|
||||
- GSC: gsc_credentials table (via GSCService)
|
||||
- Bing: bing_oauth_tokens table (via BingOAuthService)
|
||||
- WordPress: wordpress_oauth_tokens table (via WordPressOAuthService)
|
||||
- Wix: Currently in frontend sessionStorage (backend storage TODO)
|
||||
|
||||
Args:
|
||||
task: OAuthTokenMonitoringTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult with success status and details
|
||||
"""
|
||||
platform = task.platform
|
||||
user_id = task.user_id
|
||||
|
||||
try:
|
||||
self.logger.info(f"Checking token for platform: {platform}, user: {user_id}")
|
||||
|
||||
# Route to platform-specific checking logic
|
||||
if platform == 'gsc':
|
||||
return await self._check_gsc_token(user_id)
|
||||
elif platform == 'bing':
|
||||
return await self._check_bing_token(user_id)
|
||||
elif platform == 'wordpress':
|
||||
return await self._check_wordpress_token(user_id)
|
||||
elif platform == 'wix':
|
||||
return await self._check_wix_token(user_id)
|
||||
else:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Unsupported platform: {platform}",
|
||||
result_data={
|
||||
'platform': platform,
|
||||
'user_id': user_id,
|
||||
'error': 'Unsupported platform'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
f"Error checking/refreshing token for platform {platform}, user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': platform,
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False # Do not retry automatically
|
||||
)
|
||||
|
||||
async def _check_gsc_token(self, user_id: str) -> TaskExecutionResult:
|
||||
"""
|
||||
Check and refresh GSC (Google Search Console) token.
|
||||
|
||||
GSC service auto-refreshes tokens if expired when loading credentials.
|
||||
"""
|
||||
try:
|
||||
# Use absolute database path for consistency with onboarding
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
gsc_service = GSCService(db_path=db_path)
|
||||
credentials = gsc_service.load_user_credentials(user_id)
|
||||
|
||||
if not credentials:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="GSC credentials not found or could not be loaded",
|
||||
result_data={
|
||||
'platform': 'gsc',
|
||||
'user_id': user_id,
|
||||
'status': 'not_found',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
# GSC service auto-refreshes if expired, so if we get here, token is valid
|
||||
result_data = {
|
||||
'platform': 'gsc',
|
||||
'user_id': user_id,
|
||||
'status': 'valid',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'GSC token is valid (auto-refreshed if expired)'
|
||||
}
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=result_data
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error checking GSC token for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"GSC token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': 'gsc',
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
async def _check_bing_token(self, user_id: str) -> TaskExecutionResult:
|
||||
"""
|
||||
Check and refresh Bing Webmaster Tools token.
|
||||
|
||||
Checks token expiration and attempts refresh if needed.
|
||||
"""
|
||||
try:
|
||||
# Use absolute database path for consistency with onboarding
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
bing_service = BingOAuthService(db_path=db_path)
|
||||
|
||||
# Get token status (includes expired tokens)
|
||||
token_status = bing_service.get_user_token_status(user_id)
|
||||
|
||||
if not token_status.get('has_tokens'):
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No Bing tokens found for user",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'not_found',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
active_tokens = token_status.get('active_tokens', [])
|
||||
expired_tokens = token_status.get('expired_tokens', [])
|
||||
|
||||
# If we have active tokens, check if any are expiring soon (< 7 days)
|
||||
if active_tokens:
|
||||
now = datetime.utcnow()
|
||||
needs_refresh = False
|
||||
token_to_refresh = None
|
||||
|
||||
for token in active_tokens:
|
||||
expires_at_str = token.get('expires_at')
|
||||
if expires_at_str:
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
|
||||
# Check if expires within warning window (7 days)
|
||||
days_until_expiry = (expires_at - now).days
|
||||
if days_until_expiry < self.expiration_warning_days:
|
||||
needs_refresh = True
|
||||
token_to_refresh = token
|
||||
break
|
||||
except Exception:
|
||||
# If parsing fails, assume token is valid
|
||||
pass
|
||||
|
||||
if needs_refresh and token_to_refresh:
|
||||
# Attempt to refresh
|
||||
refresh_token = token_to_refresh.get('refresh_token')
|
||||
if refresh_token:
|
||||
refresh_result = bing_service.refresh_access_token(user_id, refresh_token)
|
||||
if refresh_result:
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'refreshed',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Bing token refreshed successfully'
|
||||
}
|
||||
)
|
||||
else:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Failed to refresh Bing token",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'refresh_failed',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
# Token is valid and not expiring soon
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'valid',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Bing token is valid'
|
||||
}
|
||||
)
|
||||
|
||||
# No active tokens, check if we can refresh expired ones
|
||||
if expired_tokens:
|
||||
# Try to refresh the most recent expired token
|
||||
latest_token = expired_tokens[0] # Already sorted by created_at DESC
|
||||
refresh_token = latest_token.get('refresh_token')
|
||||
|
||||
if refresh_token:
|
||||
# Check if token expired recently (within grace period)
|
||||
expires_at_str = latest_token.get('expires_at')
|
||||
if expires_at_str:
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
|
||||
# Only refresh if expired within last 24 hours (grace period)
|
||||
hours_since_expiry = (datetime.utcnow() - expires_at).total_seconds() / 3600
|
||||
if hours_since_expiry < 24:
|
||||
refresh_result = bing_service.refresh_access_token(user_id, refresh_token)
|
||||
if refresh_result:
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'refreshed',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Bing token refreshed from expired state'
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Bing token expired and could not be refreshed",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'expired',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Bing token expired. User needs to reconnect.'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No valid Bing tokens found",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'invalid',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error checking Bing token for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Bing token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
async def _check_wordpress_token(self, user_id: str) -> TaskExecutionResult:
|
||||
"""
|
||||
Check WordPress token validity.
|
||||
|
||||
Note: WordPress tokens cannot be refreshed. They expire after 2 weeks
|
||||
and require user re-authorization. We only check if token is valid.
|
||||
"""
|
||||
try:
|
||||
# Use absolute database path for consistency with onboarding
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
wordpress_service = WordPressOAuthService(db_path=db_path)
|
||||
tokens = wordpress_service.get_user_tokens(user_id)
|
||||
|
||||
if not tokens:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No WordPress tokens found for user",
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'status': 'not_found',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
# Check each token - WordPress tokens expire in 2 weeks
|
||||
now = datetime.utcnow()
|
||||
valid_tokens = []
|
||||
expiring_soon = []
|
||||
expired_tokens = []
|
||||
|
||||
for token in tokens:
|
||||
expires_at_str = token.get('expires_at')
|
||||
if expires_at_str:
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
|
||||
days_until_expiry = (expires_at - now).days
|
||||
|
||||
if days_until_expiry < 0:
|
||||
expired_tokens.append(token)
|
||||
elif days_until_expiry < self.expiration_warning_days:
|
||||
expiring_soon.append(token)
|
||||
else:
|
||||
valid_tokens.append(token)
|
||||
except Exception:
|
||||
# If parsing fails, test token validity via API
|
||||
access_token = token.get('access_token')
|
||||
if access_token and wordpress_service.test_token(access_token):
|
||||
valid_tokens.append(token)
|
||||
else:
|
||||
expired_tokens.append(token)
|
||||
else:
|
||||
# No expiration date - test token validity
|
||||
access_token = token.get('access_token')
|
||||
if access_token and wordpress_service.test_token(access_token):
|
||||
valid_tokens.append(token)
|
||||
else:
|
||||
expired_tokens.append(token)
|
||||
|
||||
if valid_tokens:
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'status': 'valid',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'WordPress token is valid',
|
||||
'valid_tokens_count': len(valid_tokens)
|
||||
}
|
||||
)
|
||||
elif expiring_soon:
|
||||
# WordPress tokens cannot be refreshed - user needs to reconnect
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="WordPress token expiring soon and cannot be auto-refreshed",
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'status': 'expiring_soon',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'WordPress token expires soon. User needs to reconnect (WordPress tokens cannot be auto-refreshed).'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
else:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="WordPress token expired and cannot be refreshed",
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'status': 'expired',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'WordPress token expired. User needs to reconnect (WordPress tokens cannot be auto-refreshed).'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error checking WordPress token for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"WordPress token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
async def _check_wix_token(self, user_id: str) -> TaskExecutionResult:
|
||||
"""
|
||||
Check Wix token validity.
|
||||
|
||||
Note: Wix tokens are currently stored in frontend sessionStorage.
|
||||
Backend storage needs to be implemented for automated checking.
|
||||
"""
|
||||
try:
|
||||
# TODO: Wix tokens are stored in frontend sessionStorage, not backend database
|
||||
# Once backend storage is implemented, we can check tokens here
|
||||
# For now, return not supported
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Wix token monitoring not yet supported - tokens stored in frontend sessionStorage",
|
||||
result_data={
|
||||
'platform': 'wix',
|
||||
'user_id': user_id,
|
||||
'status': 'not_supported',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Wix token monitoring requires backend token storage implementation'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error checking Wix token for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Wix token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': 'wix',
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
def _create_failure_alert(
|
||||
self,
|
||||
user_id: str,
|
||||
platform: str,
|
||||
error_message: str,
|
||||
result_data: Optional[Dict[str, Any]],
|
||||
db: Session
|
||||
):
|
||||
"""
|
||||
Create a UsageAlert notification when OAuth token refresh fails.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
platform: Platform identifier (gsc, bing, wordpress, wix)
|
||||
error_message: Error message from token check
|
||||
result_data: Optional result data from token check
|
||||
db: Database session
|
||||
"""
|
||||
try:
|
||||
# Determine severity based on error type
|
||||
status = result_data.get('status', 'unknown') if result_data else 'unknown'
|
||||
|
||||
if status in ['expired', 'refresh_failed']:
|
||||
severity = 'error'
|
||||
alert_type = 'oauth_token_failure'
|
||||
elif status in ['expiring_soon', 'not_found']:
|
||||
severity = 'warning'
|
||||
alert_type = 'oauth_token_warning'
|
||||
else:
|
||||
severity = 'error'
|
||||
alert_type = 'oauth_token_failure'
|
||||
|
||||
# Format platform name for display
|
||||
platform_names = {
|
||||
'gsc': 'Google Search Console',
|
||||
'bing': 'Bing Webmaster Tools',
|
||||
'wordpress': 'WordPress',
|
||||
'wix': 'Wix'
|
||||
}
|
||||
platform_display = platform_names.get(platform, platform.upper())
|
||||
|
||||
# Create alert title and message
|
||||
if status == 'expired':
|
||||
title = f"{platform_display} Token Expired"
|
||||
message = (
|
||||
f"Your {platform_display} access token has expired and could not be automatically renewed. "
|
||||
f"Please reconnect your {platform_display} account to continue using this integration."
|
||||
)
|
||||
elif status == 'expiring_soon':
|
||||
title = f"{platform_display} Token Expiring Soon"
|
||||
message = (
|
||||
f"Your {platform_display} access token will expire soon. "
|
||||
f"Please reconnect your {platform_display} account to avoid interruption."
|
||||
)
|
||||
elif status == 'refresh_failed':
|
||||
title = f"{platform_display} Token Renewal Failed"
|
||||
message = (
|
||||
f"Failed to automatically renew your {platform_display} access token. "
|
||||
f"Please reconnect your {platform_display} account. "
|
||||
f"Error: {error_message}"
|
||||
)
|
||||
elif status == 'not_found':
|
||||
title = f"{platform_display} Token Not Found"
|
||||
message = (
|
||||
f"No {platform_display} access token found. "
|
||||
f"Please connect your {platform_display} account in the onboarding settings."
|
||||
)
|
||||
else:
|
||||
title = f"{platform_display} Token Error"
|
||||
message = (
|
||||
f"An error occurred while checking your {platform_display} access token. "
|
||||
f"Please reconnect your {platform_display} account. "
|
||||
f"Error: {error_message}"
|
||||
)
|
||||
|
||||
# Get current billing period (YYYY-MM format)
|
||||
from datetime import datetime
|
||||
billing_period = datetime.utcnow().strftime("%Y-%m")
|
||||
|
||||
# Create UsageAlert
|
||||
alert = UsageAlert(
|
||||
user_id=user_id,
|
||||
alert_type=alert_type,
|
||||
threshold_percentage=0, # Not applicable for OAuth alerts
|
||||
provider=None, # Not applicable for OAuth alerts
|
||||
title=title,
|
||||
message=message,
|
||||
severity=severity,
|
||||
is_sent=False, # Will be marked as sent when frontend polls
|
||||
is_read=False,
|
||||
billing_period=billing_period
|
||||
)
|
||||
|
||||
db.add(alert)
|
||||
# Note: We don't commit here - let the caller commit
|
||||
# This allows the alert to be created atomically with the task update
|
||||
|
||||
self.logger.info(
|
||||
f"Created UsageAlert for OAuth token failure: user={user_id}, "
|
||||
f"platform={platform}, severity={severity}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Don't fail the entire task execution if alert creation fails
|
||||
self.logger.error(
|
||||
f"Failed to create UsageAlert for OAuth token failure: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: OAuthTokenMonitoringTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
|
||||
For OAuth token monitoring, frequency is always 'Weekly' (7 days).
|
||||
|
||||
Args:
|
||||
task: OAuthTokenMonitoringTask instance
|
||||
frequency: Frequency string (should be 'Weekly' for token monitoring)
|
||||
last_execution: Last execution datetime (defaults to task.last_check or now)
|
||||
|
||||
Returns:
|
||||
Next execution datetime
|
||||
"""
|
||||
if last_execution is None:
|
||||
last_execution = task.last_check if task.last_check else datetime.utcnow()
|
||||
|
||||
# OAuth token monitoring is always weekly (7 days)
|
||||
if frequency == 'Weekly':
|
||||
return last_execution + timedelta(days=7)
|
||||
else:
|
||||
# Default to weekly if frequency is not recognized
|
||||
self.logger.warning(
|
||||
f"Unknown frequency '{frequency}' for OAuth token monitoring task {task.id}. "
|
||||
f"Defaulting to Weekly (7 days)."
|
||||
)
|
||||
return last_execution + timedelta(days=7)
|
||||
|
||||
@@ -1,4 +1,12 @@
|
||||
"""
|
||||
Scheduler utilities.
|
||||
Scheduler Utilities Package
|
||||
"""
|
||||
|
||||
from .task_loader import load_due_monitoring_tasks
|
||||
from .user_job_store import extract_domain_root, get_user_job_store_name
|
||||
|
||||
__all__ = [
|
||||
'load_due_monitoring_tasks',
|
||||
'extract_domain_root',
|
||||
'get_user_job_store_name'
|
||||
]
|
||||
|
||||
54
backend/services/scheduler/utils/oauth_token_task_loader.py
Normal file
54
backend/services/scheduler/utils/oauth_token_task_loader.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""
|
||||
OAuth Token Monitoring Task Loader
|
||||
Functions to load due OAuth token monitoring tasks from database.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Union
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_, or_
|
||||
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
|
||||
|
||||
def load_due_oauth_token_monitoring_tasks(
|
||||
db: Session,
|
||||
user_id: Optional[Union[str, int]] = None
|
||||
) -> List[OAuthTokenMonitoringTask]:
|
||||
"""
|
||||
Load all OAuth token monitoring tasks that are due for execution.
|
||||
|
||||
Criteria:
|
||||
- status == 'active' (only check active tasks)
|
||||
- next_check <= now (or is None for first execution)
|
||||
- Optional: user_id filter for specific user (for user isolation)
|
||||
|
||||
User isolation is enforced through filtering by user_id when provided.
|
||||
If no user_id is provided, loads tasks for all users (for system-wide monitoring).
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: Optional user ID (Clerk string) to filter tasks (if None, loads all users' tasks)
|
||||
|
||||
Returns:
|
||||
List of due OAuthTokenMonitoringTask instances
|
||||
"""
|
||||
now = datetime.utcnow()
|
||||
|
||||
# Build query for due tasks
|
||||
query = db.query(OAuthTokenMonitoringTask).filter(
|
||||
and_(
|
||||
OAuthTokenMonitoringTask.status == 'active',
|
||||
or_(
|
||||
OAuthTokenMonitoringTask.next_check <= now,
|
||||
OAuthTokenMonitoringTask.next_check.is_(None)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Apply user filter if provided (for user isolation)
|
||||
if user_id is not None:
|
||||
query = query.filter(OAuthTokenMonitoringTask.user_id == str(user_id))
|
||||
|
||||
return query.all()
|
||||
|
||||
@@ -4,7 +4,7 @@ Functions to load due tasks from database.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Union
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
from sqlalchemy import and_, or_
|
||||
|
||||
@@ -14,7 +14,7 @@ from models.enhanced_strategy_models import EnhancedContentStrategy
|
||||
|
||||
def load_due_monitoring_tasks(
|
||||
db: Session,
|
||||
user_id: Optional[int] = None
|
||||
user_id: Optional[Union[str, int]] = None
|
||||
) -> List[MonitoringTask]:
|
||||
"""
|
||||
Load all monitoring tasks that are due for execution.
|
||||
@@ -22,14 +22,17 @@ def load_due_monitoring_tasks(
|
||||
Criteria:
|
||||
- status == 'active'
|
||||
- next_execution <= now (or is None for first execution)
|
||||
- Optional: user_id filter for specific user (for future admin features)
|
||||
- Optional: user_id filter for specific user (for user isolation)
|
||||
|
||||
Note: Strategy relationship is eagerly loaded to ensure user_id is accessible
|
||||
during task execution for user isolation.
|
||||
|
||||
User isolation is enforced through filtering by user_id when provided.
|
||||
If no user_id is provided, loads tasks for all users (for system-wide monitoring).
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: Optional user ID to filter tasks (if None, loads all users' tasks)
|
||||
user_id: Optional user ID (Clerk string or int) to filter tasks (if None, loads all users' tasks)
|
||||
|
||||
Returns:
|
||||
List of due MonitoringTask instances with strategy relationship loaded
|
||||
|
||||
129
backend/services/scheduler/utils/user_job_store.py
Normal file
129
backend/services/scheduler/utils/user_job_store.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""
|
||||
User Job Store Utilities
|
||||
Utilities for managing per-user job stores based on website root.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session as SQLSession
|
||||
|
||||
from services.database import get_db_session
|
||||
from models.onboarding import OnboardingSession, WebsiteAnalysis
|
||||
|
||||
|
||||
def extract_domain_root(url: str) -> str:
|
||||
"""
|
||||
Extract domain root from a website URL for use as job store identifier.
|
||||
|
||||
Examples:
|
||||
https://www.example.com -> example
|
||||
https://blog.example.com -> example
|
||||
https://example.co.uk -> example
|
||||
http://subdomain.example.com/path -> example
|
||||
|
||||
Args:
|
||||
url: Website URL
|
||||
|
||||
Returns:
|
||||
Domain root (e.g., 'example') or 'default' if extraction fails
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.netloc or parsed.path.split('/')[0]
|
||||
|
||||
# Remove www. prefix if present
|
||||
if hostname.startswith('www.'):
|
||||
hostname = hostname[4:]
|
||||
|
||||
# Split by dots and get the root domain
|
||||
# For example.com -> example, for example.co.uk -> example
|
||||
parts = hostname.split('.')
|
||||
if len(parts) >= 2:
|
||||
# Handle common TLDs that might be part of domain (e.g., co.uk)
|
||||
if len(parts) >= 3 and parts[-2] in ['co', 'com', 'net', 'org']:
|
||||
root = parts[-3]
|
||||
else:
|
||||
root = parts[-2]
|
||||
else:
|
||||
root = parts[0] if parts else 'default'
|
||||
|
||||
# Clean and validate root
|
||||
root = root.lower().strip()
|
||||
# Remove invalid characters for job store name
|
||||
root = ''.join(c for c in root if c.isalnum() or c in ['-', '_'])
|
||||
|
||||
if not root or len(root) < 2:
|
||||
return 'default'
|
||||
|
||||
return root
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract domain root from URL '{url}': {e}")
|
||||
return 'default'
|
||||
|
||||
|
||||
def get_user_job_store_name(user_id: str, db: SQLSession = None) -> str:
|
||||
"""
|
||||
Get job store name for a user based on their website root from onboarding.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
db: Optional database session (will create if not provided)
|
||||
|
||||
Returns:
|
||||
Job store name (e.g., 'example' or 'default')
|
||||
"""
|
||||
db_session = db
|
||||
close_db = False
|
||||
|
||||
try:
|
||||
if not db_session:
|
||||
db_session = get_db_session()
|
||||
close_db = True
|
||||
|
||||
if not db_session:
|
||||
logger.warning(f"Could not get database session for user {user_id}, using default job store")
|
||||
return 'default'
|
||||
|
||||
# Get user's website URL from onboarding
|
||||
# Query directly since user_id is a string (Clerk ID)
|
||||
onboarding_session = db_session.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not onboarding_session:
|
||||
logger.debug(
|
||||
f"[Job Store] No onboarding session found for user {user_id}, using default job store. "
|
||||
f"This is normal if user hasn't completed onboarding."
|
||||
)
|
||||
return 'default'
|
||||
|
||||
# Get the latest website analysis for this session
|
||||
website_analysis = db_session.query(WebsiteAnalysis).filter(
|
||||
WebsiteAnalysis.session_id == onboarding_session.id
|
||||
).order_by(WebsiteAnalysis.updated_at.desc()).first()
|
||||
|
||||
if not website_analysis or not website_analysis.website_url:
|
||||
logger.debug(
|
||||
f"[Job Store] No website URL found for user {user_id} (session_id: {onboarding_session.id}), "
|
||||
f"using default job store. This is normal if website analysis wasn't completed."
|
||||
)
|
||||
return 'default'
|
||||
|
||||
website_url = website_analysis.website_url
|
||||
domain_root = extract_domain_root(website_url)
|
||||
|
||||
logger.debug(f"Job store for user {user_id}: {domain_root} (from {website_url})")
|
||||
return domain_root
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting job store name for user {user_id}: {e}")
|
||||
return 'default'
|
||||
finally:
|
||||
if close_db and db_session:
|
||||
try:
|
||||
db_session.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -494,10 +494,8 @@ class LimitValidator:
|
||||
|
||||
display_provider_name = actual_provider_name or provider_name
|
||||
|
||||
logger.error(f"[Pre-flight Check] ✅ Operation {op_idx + 1}/{len(operations)}: {operation_type}")
|
||||
logger.error(f" ├─ Provider: {display_provider_name} (enum: {provider_name})")
|
||||
logger.error(f" ├─ Operation Index: {op_idx}")
|
||||
logger.error(f" └─ Estimated Tokens Requested: {tokens_requested}")
|
||||
# Log operation details at debug level (only when needed)
|
||||
logger.debug(f"[Pre-flight] Operation {op_idx + 1}/{len(operations)}: {operation_type} ({display_provider_name}, {tokens_requested} tokens)")
|
||||
|
||||
# Check if this is an LLM provider
|
||||
llm_providers = ['gemini', 'openai', 'anthropic', 'mistral']
|
||||
@@ -563,13 +561,11 @@ class LimitValidator:
|
||||
|
||||
if result:
|
||||
base_current_tokens = result[0] if result[0] is not None else 0
|
||||
logger.error(f"[Pre-flight Check] ✅ Raw SQL query returned result: {result[0]} -> {base_current_tokens}")
|
||||
else:
|
||||
base_current_tokens = 0
|
||||
logger.error(f"[Pre-flight Check] ⚠️ Raw SQL query returned None (no rows found)")
|
||||
|
||||
query_succeeded = True
|
||||
logger.error(f"[Pre-flight Check] ✅ Raw SQL query succeeded for {provider_tokens_key}: {base_current_tokens}")
|
||||
logger.debug(f"[Pre-flight] Raw SQL query for {provider_tokens_key}: {base_current_tokens}")
|
||||
|
||||
except Exception as sql_error:
|
||||
logger.error(f" └─ Raw SQL query failed for {provider_tokens_key}: {type(sql_error).__name__}: {sql_error}", exc_info=True)
|
||||
@@ -606,14 +602,8 @@ class LimitValidator:
|
||||
if not query_succeeded:
|
||||
logger.warning(f" └─ Both query methods failed, using 0 as fallback")
|
||||
|
||||
# CRITICAL LOG: Always log what we got from DB - this helps debug renewal issues
|
||||
# Use ERROR level to ensure it shows even if INFO is filtered
|
||||
logger.error(f"[Pre-flight Check] 🔍 Fresh DB Query for {display_provider_name}:")
|
||||
logger.error(f" ├─ Column: {provider_tokens_key}")
|
||||
logger.error(f" ├─ Billing Period: {current_period}")
|
||||
logger.error(f" ├─ User ID: {user_id}")
|
||||
logger.error(f" ├─ Method: {'Raw SQL' if query_succeeded and base_current_tokens >= 0 else 'ORM' if query_succeeded else 'Failed - using 0'}")
|
||||
logger.error(f" └─ Value from DB: {base_current_tokens}")
|
||||
# Log DB query result at debug level (only when needed for troubleshooting)
|
||||
logger.debug(f"[Pre-flight] DB query for {display_provider_name} ({provider_tokens_key}): {base_current_tokens} (period: {current_period})")
|
||||
|
||||
# Add any projected tokens from previous operations in this validation run
|
||||
# Note: total_llm_tokens tracks ONLY projected tokens from this run, not base DB value
|
||||
@@ -622,16 +612,8 @@ class LimitValidator:
|
||||
# Current tokens = base from DB + projected from previous operations in this run
|
||||
current_provider_tokens = base_current_tokens + projected_from_previous
|
||||
|
||||
# Use ERROR level to ensure visibility
|
||||
logger.error(f"[Pre-flight Check] 📊 Token Calculation for {display_provider_name}:")
|
||||
logger.error(f" ├─ Base from DB (fresh query): {base_current_tokens}")
|
||||
logger.error(f" ├─ Projected from previous ops in this run: {projected_from_previous}")
|
||||
logger.error(f" └─ Total current tokens (base + projected): {current_provider_tokens}")
|
||||
|
||||
# Also check the initial usage object to see if it's being used incorrectly
|
||||
if usage and hasattr(usage, provider_tokens_key):
|
||||
initial_usage_value = getattr(usage, provider_tokens_key, 0) or 0
|
||||
logger.error(f" ⚠️ Initial usage object value: {initial_usage_value} (this should NOT be used for fresh query)")
|
||||
# Log token calculation at debug level
|
||||
logger.debug(f"[Pre-flight] Token calc for {display_provider_name}: base={base_current_tokens}, projected={projected_from_previous}, total={current_provider_tokens}")
|
||||
|
||||
token_limit = limits.get(provider_tokens_key, 0) or 0
|
||||
|
||||
@@ -687,15 +669,10 @@ class LimitValidator:
|
||||
if tokens_requested > 0:
|
||||
# Add this operation's tokens to cumulative projected tokens
|
||||
total_llm_tokens[provider_tokens_key] = projected_from_previous + tokens_requested
|
||||
logger.error(f"[Pre-flight Check] 📝 Updated cumulative projected tokens for {display_provider_name}:")
|
||||
logger.error(f" ├─ Previous projected: {projected_from_previous}")
|
||||
logger.error(f" ├─ This operation requested: {tokens_requested}")
|
||||
logger.error(f" ├─ New cumulative projected: {total_llm_tokens[provider_tokens_key]}")
|
||||
logger.error(f" └─ Old value in dict was: {old_projected}")
|
||||
logger.debug(f"[Pre-flight] Updated projected tokens for {display_provider_name}: {projected_from_previous} + {tokens_requested} = {total_llm_tokens[provider_tokens_key]}")
|
||||
else:
|
||||
# No tokens requested, keep existing projected tokens (or 0 if first operation)
|
||||
total_llm_tokens[provider_tokens_key] = projected_from_previous
|
||||
logger.error(f"[Pre-flight Check] 📝 No tokens requested, keeping projected at: {projected_from_previous}")
|
||||
|
||||
# Check image generation limits
|
||||
elif provider == APIProvider.STABILITY:
|
||||
|
||||
@@ -237,9 +237,10 @@ async def monitoring_middleware(request: Request, call_next):
|
||||
# Check for authorization header with user info
|
||||
elif 'authorization' in request.headers:
|
||||
# Auth middleware should have set request.state.user_id
|
||||
# If not, this indicates an authentication failure that should be logged
|
||||
# If not, this indicates an authentication failure (likely expired token)
|
||||
# Log at debug level to reduce noise - expired tokens are expected
|
||||
user_id = None
|
||||
logger.warning("Monitoring: Auth header present but no user_id in state - authentication may have failed")
|
||||
logger.debug("Monitoring: Auth header present but no user_id in state - token likely expired")
|
||||
|
||||
# Final fallback: None (skip usage limits for truly anonymous/unauthenticated)
|
||||
else:
|
||||
|
||||
@@ -93,11 +93,7 @@ def validate_research_operations(
|
||||
provider = usage_info.get('provider', llm_provider_name) if usage_info else llm_provider_name
|
||||
operation_type = usage_info.get('operation_type', 'unknown')
|
||||
|
||||
logger.error(f"[Pre-flight Validator] ❌ RESEARCH WORKFLOW BLOCKED")
|
||||
logger.error(f" ├─ User: {user_id}")
|
||||
logger.error(f" ├─ Blocked at: {operation_type}")
|
||||
logger.error(f" ├─ Provider: {provider}")
|
||||
logger.error(f" └─ Reason: {message}")
|
||||
logger.warning(f"[Pre-flight] Research blocked for user {user_id}: {operation_type} ({provider}) - {message}")
|
||||
|
||||
# Raise HTTPException immediately - frontend gets immediate response, no API calls made
|
||||
raise HTTPException(
|
||||
|
||||
Reference in New Issue
Block a user