Scheduled research persona generation

This commit is contained in:
ajaysi
2025-11-05 08:51:00 +05:30
parent 55087c4f37
commit d99c7c83a7
98 changed files with 14518 additions and 828 deletions

View File

@@ -389,10 +389,19 @@ class ResearchService:
exa_provider.track_exa_usage(user_id, cost)
# Extract content for downstream analysis
# Handle None result case
if raw_result is None:
logger.error("raw_result is None after Exa search - this should not happen if HTTPException was raised")
raise ValueError("Exa research result is None - search operation failed unexpectedly")
if not isinstance(raw_result, dict):
logger.warning(f"raw_result is not a dict (type: {type(raw_result)}), using defaults")
raw_result = {}
content = raw_result.get('content', '')
sources = raw_result.get('sources', [])
sources = raw_result.get('sources', []) or []
search_widget = "" # Exa doesn't provide search widgets
search_queries = raw_result.get('search_queries', [])
search_queries = raw_result.get('search_queries', []) or []
grounding_metadata = None # Exa doesn't provide grounding metadata
except RuntimeError as e:
@@ -423,10 +432,15 @@ class ResearchService:
await task_manager.update_progress(task_id, "📊 Processing research results and extracting insights...")
# Extract sources and content
# Handle None result case
if gemini_result is None:
logger.error("gemini_result is None after search - this should not happen if HTTPException was raised")
raise ValueError("Research result is None - search operation failed unexpectedly")
sources = self._extract_sources_from_grounding(gemini_result)
content = gemini_result.get("content", "")
search_widget = gemini_result.get("search_widget", "") or ""
search_queries = gemini_result.get("search_queries", []) or []
content = gemini_result.get("content", "") if isinstance(gemini_result, dict) else ""
search_widget = gemini_result.get("search_widget", "") or "" if isinstance(gemini_result, dict) else ""
search_queries = gemini_result.get("search_queries", []) or [] if isinstance(gemini_result, dict) else []
grounding_metadata = self._extract_grounding_metadata(gemini_result)
# Continue with common analysis (same for both providers)
@@ -548,8 +562,17 @@ class ResearchService:
"""Extract sources from Gemini grounding metadata."""
sources = []
# Handle None or invalid gemini_result
if not gemini_result or not isinstance(gemini_result, dict):
logger.warning("gemini_result is None or not a dict, returning empty sources")
return sources
# The Gemini grounded provider already extracts sources and puts them in the 'sources' field
raw_sources = gemini_result.get("sources", [])
# Ensure raw_sources is a list (handle None case)
if raw_sources is None:
raw_sources = []
for src in raw_sources:
source = ResearchSource(
title=src.get("title", "Untitled"),
@@ -570,6 +593,15 @@ class ResearchService:
grounding_supports = []
citations = []
# Handle None or invalid gemini_result
if not gemini_result or not isinstance(gemini_result, dict):
logger.warning("gemini_result is None or not a dict, returning empty grounding metadata")
return GroundingMetadata(
grounding_chunks=grounding_chunks,
grounding_supports=grounding_supports,
citations=citations
)
# Extract grounding chunks from the raw grounding metadata
raw_grounding = gemini_result.get("grounding_metadata", {})
@@ -577,7 +609,11 @@ class ResearchService:
if hasattr(raw_grounding, 'grounding_chunks'):
raw_chunks = raw_grounding.grounding_chunks
else:
raw_chunks = raw_grounding.get("grounding_chunks", [])
raw_chunks = raw_grounding.get("grounding_chunks", []) if isinstance(raw_grounding, dict) else []
# Ensure raw_chunks is a list (handle None case)
if raw_chunks is None:
raw_chunks = []
for chunk in raw_chunks:
if "web" in chunk:

View File

@@ -0,0 +1,179 @@
"""
OAuth Token Monitoring Service
Service for creating and managing OAuth token monitoring tasks.
"""
from datetime import datetime, timedelta
from typing import List, Optional
from sqlalchemy.orm import Session
from utils.logger_utils import get_service_logger
import os
# Use service logger for consistent logging (WARNING level visible in production)
logger = get_service_logger("oauth_token_monitoring")
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
from services.gsc_service import GSCService
from services.integrations.bing_oauth import BingOAuthService
from services.integrations.wordpress_oauth import WordPressOAuthService
# Note: Wix tokens are stored in frontend sessionStorage, not backend database
# So we cannot check for Wix connections from the backend yet
def get_connected_platforms(user_id: str) -> List[str]:
"""
Detect which platforms are connected for a user by checking token storage.
Checks:
- GSC: gsc_credentials table
- Bing: bing_oauth_tokens table
- WordPress: wordpress_oauth_tokens table
- Wix: Not checked (tokens in frontend sessionStorage)
Args:
user_id: User ID (Clerk string)
Returns:
List of connected platform identifiers: ['gsc', 'bing', 'wordpress', 'wix']
"""
connected = []
logger.warning(f"[OAuth Monitoring] Checking connected platforms for user: {user_id}")
try:
# Check GSC - use absolute database path
db_path = os.path.abspath("alwrity.db")
logger.warning(f"[OAuth Monitoring] Checking GSC with db_path: {db_path}")
gsc_service = GSCService(db_path=db_path)
gsc_credentials = gsc_service.load_user_credentials(user_id)
if gsc_credentials:
connected.append('gsc')
logger.warning(f"[OAuth Monitoring] ✅ GSC connected for user {user_id}")
else:
logger.warning(f"[OAuth Monitoring] ❌ GSC not connected for user {user_id} (no credentials found)")
except Exception as e:
logger.warning(f"[OAuth Monitoring] ⚠️ GSC check failed for user {user_id}: {e}", exc_info=True)
try:
# Check Bing - use absolute database path
db_path = os.path.abspath("alwrity.db")
logger.warning(f"[OAuth Monitoring] Checking Bing with db_path: {db_path}")
bing_service = BingOAuthService(db_path=db_path)
token_status = bing_service.get_user_token_status(user_id)
has_tokens = token_status.get('has_active_tokens', False)
logger.warning(f"[OAuth Monitoring] Bing token_status keys: {list(token_status.keys())}, has_active_tokens: {has_tokens}")
if has_tokens:
connected.append('bing')
logger.warning(f"[OAuth Monitoring] ✅ Bing connected for user {user_id}")
else:
logger.warning(f"[OAuth Monitoring] ❌ Bing not connected for user {user_id} (no active tokens)")
except Exception as e:
logger.warning(f"[OAuth Monitoring] ⚠️ Bing check failed for user {user_id}: {e}", exc_info=True)
try:
# Check WordPress - use absolute database path
db_path = os.path.abspath("alwrity.db")
logger.warning(f"[OAuth Monitoring] Checking WordPress with db_path: {db_path}")
wordpress_service = WordPressOAuthService(db_path=db_path)
tokens = wordpress_service.get_user_tokens(user_id)
logger.warning(f"[OAuth Monitoring] WordPress tokens found: {len(tokens) if tokens else 0}")
if tokens and len(tokens) > 0:
connected.append('wordpress')
logger.warning(f"[OAuth Monitoring] ✅ WordPress connected for user {user_id} ({len(tokens)} token(s))")
else:
logger.warning(f"[OAuth Monitoring] ❌ WordPress not connected for user {user_id} (no tokens found)")
except Exception as e:
logger.warning(f"[OAuth Monitoring] ⚠️ WordPress check failed for user {user_id}: {e}", exc_info=True)
# Wix: Not checked (tokens in frontend sessionStorage)
# TODO: Once backend storage is implemented, check wix_tokens table
logger.warning(f"[OAuth Monitoring] Connected platforms for user {user_id}: {connected}")
return connected
def create_oauth_monitoring_tasks(
user_id: str,
db: Session,
platforms: Optional[List[str]] = None
) -> List[OAuthTokenMonitoringTask]:
"""
Create OAuth token monitoring tasks for a user.
If platforms are not provided, automatically detects connected platforms.
Creates one task per platform with next_check set to 7 days from now.
Args:
user_id: User ID (Clerk string)
db: Database session
platforms: Optional list of platforms to create tasks for.
If None, auto-detects connected platforms.
Valid values: 'gsc', 'bing', 'wordpress', 'wix'
Returns:
List of created OAuthTokenMonitoringTask instances
"""
try:
# Auto-detect platforms if not provided
if platforms is None:
platforms = get_connected_platforms(user_id)
logger.warning(f"[OAuth Monitoring] Auto-detected {len(platforms)} connected platforms for user {user_id}: {platforms}")
else:
logger.warning(f"[OAuth Monitoring] Creating monitoring tasks for specified platforms: {platforms}")
if not platforms:
logger.warning(f"[OAuth Monitoring] No connected platforms found for user {user_id}. No monitoring tasks created.")
return []
created_tasks = []
now = datetime.utcnow()
next_check = now + timedelta(days=7) # 7 days from now
for platform in platforms:
# Check if task already exists for this user/platform combination
existing_task = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.user_id == user_id,
OAuthTokenMonitoringTask.platform == platform
).first()
if existing_task:
logger.warning(
f"[OAuth Monitoring] Monitoring task already exists for user {user_id}, platform {platform}. "
f"Skipping creation."
)
continue
# Create new monitoring task
task = OAuthTokenMonitoringTask(
user_id=user_id,
platform=platform,
status='active',
next_check=next_check,
created_at=now,
updated_at=now
)
db.add(task)
created_tasks.append(task)
logger.warning(
f"[OAuth Monitoring] Created OAuth token monitoring task for user {user_id}, "
f"platform {platform}, next_check: {next_check.isoformat()}"
)
db.commit()
logger.warning(
f"[OAuth Monitoring] Successfully created {len(created_tasks)} OAuth token monitoring tasks "
f"for user {user_id}"
)
return created_tasks
except Exception as e:
logger.error(
f"Error creating OAuth token monitoring tasks for user {user_id}: {e}",
exc_info=True
)
db.rollback()
return []

View File

@@ -26,12 +26,63 @@ class OnboardingDatabaseService:
# Cache for schema feature detection
self._brand_cols_checked: bool = False
self._brand_cols_available: bool = False
self._research_persona_cols_checked: bool = False
self._research_persona_cols_available: bool = False
# --- Feature flags and schema detection helpers ---
def _brand_feature_enabled(self) -> bool:
"""Check if writing brand-related columns is enabled via env flag."""
return os.getenv('ENABLE_WEBSITE_BRAND_COLUMNS', 'true').lower() in {'1', 'true', 'yes', 'on'}
def _ensure_research_persona_columns(self, session_db: Session) -> None:
"""Ensure research_persona columns exist in persona_data table (runtime migration)."""
if self._research_persona_cols_checked:
return
try:
# Check if columns exist using PRAGMA (SQLite) or information_schema (PostgreSQL)
db_url = str(session_db.bind.url) if session_db.bind else ""
if 'sqlite' in db_url.lower():
# SQLite: Use PRAGMA to check columns
result = session_db.execute(text("PRAGMA table_info(persona_data)"))
cols = {row[1] for row in result} # Column name is at index 1
if 'research_persona' not in cols:
logger.info("Adding missing column research_persona to persona_data table")
session_db.execute(text("ALTER TABLE persona_data ADD COLUMN research_persona JSON"))
session_db.commit()
if 'research_persona_generated_at' not in cols:
logger.info("Adding missing column research_persona_generated_at to persona_data table")
session_db.execute(text("ALTER TABLE persona_data ADD COLUMN research_persona_generated_at TIMESTAMP"))
session_db.commit()
self._research_persona_cols_available = True
else:
# PostgreSQL: Try to query the columns (will fail if they don't exist)
try:
session_db.execute(text("SELECT research_persona, research_persona_generated_at FROM persona_data LIMIT 0"))
self._research_persona_cols_available = True
except Exception:
# Columns don't exist, add them
logger.info("Adding missing columns research_persona and research_persona_generated_at to persona_data table")
try:
session_db.execute(text("ALTER TABLE persona_data ADD COLUMN research_persona JSONB"))
session_db.execute(text("ALTER TABLE persona_data ADD COLUMN research_persona_generated_at TIMESTAMP"))
session_db.commit()
self._research_persona_cols_available = True
except Exception as alter_err:
logger.error(f"Failed to add research_persona columns: {alter_err}")
session_db.rollback()
raise
except Exception as e:
logger.error(f"Error ensuring research_persona columns: {e}")
session_db.rollback()
raise
finally:
self._research_persona_cols_checked = True
def _ensure_brand_column_detection(self, session_db: Session) -> None:
"""Detect at runtime whether brand columns exist and cache the result."""
if self._brand_cols_checked:
@@ -477,6 +528,9 @@ class OnboardingDatabaseService:
if not session_db:
raise ValueError("Database session required")
# Ensure research_persona columns exist before querying
self._ensure_research_persona_columns(session_db)
try:
session = self.get_session_by_user(user_id, session_db)
if not session:

View File

@@ -0,0 +1,239 @@
"""
Facebook Persona Scheduler
Handles scheduled generation of Facebook personas after onboarding.
"""
from datetime import datetime, timedelta, timezone
from typing import Dict, Any
from loguru import logger
from services.database import get_db_session
from services.persona_data_service import PersonaDataService
from services.persona.facebook.facebook_persona_service import FacebookPersonaService
from services.onboarding.database_service import OnboardingDatabaseService
from models.scheduler_models import SchedulerEventLog
async def generate_facebook_persona_task(user_id: str):
"""
Async task function to generate Facebook persona for a user.
This function is called by the scheduler 20 minutes after onboarding completion.
Args:
user_id: User ID (Clerk string)
"""
db = None
try:
logger.info(f"Scheduled Facebook persona generation started for user {user_id}")
db = get_db_session()
if not db:
logger.error(f"Failed to get database session for Facebook persona generation (user: {user_id})")
return
# Get persona data service
persona_data_service = PersonaDataService(db_session=db)
onboarding_service = OnboardingDatabaseService(db=db)
# Get core persona (required for Facebook persona)
persona_data = persona_data_service.get_user_persona_data(user_id)
if not persona_data or not persona_data.get('core_persona'):
logger.warning(f"No core persona found for user {user_id}, cannot generate Facebook persona")
return
core_persona = persona_data.get('core_persona', {})
# Get onboarding data for context
website_analysis = onboarding_service.get_website_analysis(user_id, db)
research_prefs = onboarding_service.get_research_preferences(user_id, db)
onboarding_data = {
"website_url": website_analysis.get('website_url', '') if website_analysis else '',
"writing_style": website_analysis.get('writing_style', {}) if website_analysis else {},
"content_characteristics": website_analysis.get('content_characteristics', {}) if website_analysis else {},
"target_audience": website_analysis.get('target_audience', '') if website_analysis else '',
"research_preferences": research_prefs or {}
}
# Check if persona already exists to avoid unnecessary API calls
platform_personas = persona_data.get('platform_personas', {}) if persona_data else {}
if platform_personas.get('facebook'):
logger.info(f"Facebook persona already exists for user {user_id}, skipping generation")
return
start_time = datetime.utcnow()
# Generate Facebook persona
facebook_service = FacebookPersonaService()
try:
generated_persona = facebook_service.generate_facebook_persona(
core_persona,
onboarding_data
)
execution_time = (datetime.utcnow() - start_time).total_seconds()
if generated_persona and "error" not in generated_persona:
# Save to database
success = persona_data_service.save_platform_persona(user_id, 'facebook', generated_persona)
if success:
logger.info(f"✅ Scheduled Facebook persona generation completed for user {user_id}")
# Log success to scheduler event log for dashboard
try:
event_log = SchedulerEventLog(
event_type='job_completed',
event_date=start_time,
job_id=f"facebook_persona_{user_id}",
job_type='one_time',
user_id=user_id,
event_data={
'job_function': 'generate_facebook_persona_task',
'execution_time_seconds': execution_time,
'status': 'success'
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log Facebook persona generation success to scheduler event log: {log_error}")
if db:
db.rollback()
else:
error_msg = f"Failed to save Facebook persona for user {user_id}"
logger.warning(f"⚠️ {error_msg}")
# Log failure to scheduler event log
try:
event_log = SchedulerEventLog(
event_type='job_failed',
event_date=start_time,
job_id=f"facebook_persona_{user_id}",
job_type='one_time',
user_id=user_id,
error_message=error_msg,
event_data={
'job_function': 'generate_facebook_persona_task',
'execution_time_seconds': execution_time,
'status': 'failed',
'failure_reason': 'save_failed',
'expensive_api_call': True
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log Facebook persona save failure to scheduler event log: {log_error}")
if db:
db.rollback()
else:
error_msg = f"Scheduled Facebook persona generation failed for user {user_id}: {generated_persona}"
logger.error(f"{error_msg}")
# Log failure to scheduler event log for dashboard visibility
try:
event_log = SchedulerEventLog(
event_type='job_failed',
event_date=start_time,
job_id=f"facebook_persona_{user_id}", # Match scheduled job ID format
job_type='one_time',
user_id=user_id,
error_message=error_msg,
event_data={
'job_function': 'generate_facebook_persona_task',
'execution_time_seconds': execution_time,
'status': 'failed',
'failure_reason': 'generation_returned_error',
'expensive_api_call': True
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log Facebook persona generation failure to scheduler event log: {log_error}")
if db:
db.rollback()
except Exception as gen_error:
execution_time = (datetime.utcnow() - start_time).total_seconds()
error_msg = f"Exception during scheduled Facebook persona generation for user {user_id}: {str(gen_error)}. Expensive API call may have been made."
logger.error(f"{error_msg}")
# Log exception to scheduler event log for dashboard visibility
try:
event_log = SchedulerEventLog(
event_type='job_failed',
event_date=start_time,
job_id=f"facebook_persona_{user_id}", # Match scheduled job ID format
job_type='one_time',
user_id=user_id,
error_message=error_msg,
event_data={
'job_function': 'generate_facebook_persona_task',
'execution_time_seconds': execution_time,
'status': 'failed',
'failure_reason': 'exception',
'exception_type': type(gen_error).__name__,
'exception_message': str(gen_error),
'expensive_api_call': True
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log Facebook persona generation exception to scheduler event log: {log_error}")
if db:
db.rollback()
except Exception as e:
logger.error(f"Error in scheduled Facebook persona generation for user {user_id}: {e}")
finally:
if db:
try:
db.close()
except Exception as e:
logger.error(f"Error closing database session: {e}")
def schedule_facebook_persona_generation(user_id: str, delay_minutes: int = 20) -> str:
"""
Schedule Facebook persona generation for a user after a delay.
Args:
user_id: User ID (Clerk string)
delay_minutes: Delay in minutes before generating persona (default: 20)
Returns:
Job ID
"""
try:
from services.scheduler import get_scheduler
scheduler = get_scheduler()
# Calculate run date (current time + delay) - ensure UTC timezone-aware
run_date = datetime.now(timezone.utc) + timedelta(minutes=delay_minutes)
# Generate consistent job ID (without timestamp) for proper restoration
# This allows restoration to find and restore the job with original scheduled time
# Note: Clerk user_id already includes "user_" prefix, so we don't add it again
job_id = f"facebook_persona_{user_id}"
# Schedule the task
scheduled_job_id = scheduler.schedule_one_time_task(
func=generate_facebook_persona_task,
run_date=run_date,
job_id=job_id,
kwargs={"user_id": user_id},
replace_existing=True
)
logger.info(
f"Scheduled Facebook persona generation for user {user_id} "
f"at {run_date} (job_id: {scheduled_job_id})"
)
return scheduled_job_id
except Exception as e:
logger.error(f"Failed to schedule Facebook persona generation for user {user_id}: {e}")
raise

View File

@@ -0,0 +1,171 @@
"""
Research Persona Prompt Builder
Handles building comprehensive prompts for research persona generation.
Generates personalized research defaults, suggestions, and configurations.
"""
from typing import Dict, Any
import json
from loguru import logger
class ResearchPersonaPromptBuilder:
"""Builds comprehensive prompts for research persona generation."""
def build_research_persona_prompt(self, onboarding_data: Dict[str, Any]) -> str:
"""Build the research persona generation prompt with comprehensive data."""
# Extract data from onboarding_data
website_analysis = onboarding_data.get("website_analysis", {}) or {}
persona_data = onboarding_data.get("persona_data", {}) or {}
research_prefs = onboarding_data.get("research_preferences", {}) or {}
business_info = onboarding_data.get("business_info", {}) or {}
# Extract core persona
core_persona = persona_data.get("core_persona", {}) or {}
prompt = f"""
COMPREHENSIVE RESEARCH PERSONA GENERATION TASK: Create a highly detailed, personalized research persona based on the user's business, writing style, and content strategy. This persona will provide intelligent defaults and suggestions for research inputs.
=== USER CONTEXT ===
BUSINESS INFORMATION:
{json.dumps(business_info, indent=2)}
WEBSITE ANALYSIS:
{json.dumps(website_analysis, indent=2)}
CORE PERSONA:
{json.dumps(core_persona, indent=2)}
RESEARCH PREFERENCES:
{json.dumps(research_prefs, indent=2)}
=== RESEARCH PERSONA GENERATION REQUIREMENTS ===
Generate a comprehensive research persona in JSON format with the following structure:
1. DEFAULT VALUES:
- "default_industry": Extract from core_persona.industry, business_info.industry, or website_analysis target_audience. Use "General" only if none available.
- "default_target_audience": Extract from core_persona.target_audience, website_analysis.target_audience, or business_info.target_audience. Be specific and descriptive.
- "default_research_mode": Suggest "basic", "comprehensive", or "targeted" based on research_preferences.research_depth and content_type preferences.
- "default_provider": Suggest "google" for news/trends, "exa" for academic/technical deep-dives, or "google" as default.
2. KEYWORD INTELLIGENCE:
- "suggested_keywords": Generate 8-12 keywords relevant to the user's industry, interests (from core_persona), and content goals.
- "keyword_expansion_patterns": Create a dictionary mapping common keywords to expanded, industry-specific terms. Include 10-15 patterns like:
{{"AI": ["healthcare AI", "medical AI", "clinical AI", "diagnostic AI"], "tools": ["medical devices", "clinical tools"], ...}}
Focus on industry-specific terminology from the user's domain.
3. DOMAIN EXPERTISE:
- "suggested_exa_domains": List 4-6 authoritative domains for the user's industry (e.g., Healthcare: ["pubmed.gov", "nejm.org", "thelancet.com"]).
- "suggested_exa_category": Suggest appropriate Exa category based on industry:
- Healthcare/Science: "research paper"
- Finance: "financial report"
- Technology/Business: "company" or "news"
- Default: null (empty string for all categories)
4. RESEARCH ANGLES:
- "research_angles": Generate 5-8 alternative research angles/focuses based on:
- User's pain points and challenges (from core_persona)
- Industry trends and opportunities
- Content goals (from research_preferences)
- Audience interests (from core_persona.interests)
Examples: "Compare {{topic}} tools", "{{topic}} ROI analysis", "Latest {{topic}} trends", etc.
5. QUERY ENHANCEMENT:
- "query_enhancement_rules": Create templates for improving vague user queries:
{{"vague_ai": "Research: AI applications in {{industry}} for {{audience}}", "vague_tools": "Compare top {{industry}} tools", ...}}
Include 5-8 enhancement patterns.
6. RECOMMENDED PRESETS:
- "recommended_presets": Generate 3-5 personalized research preset templates. Each preset should include:
- name: Descriptive name (e.g., "{{Industry}} Trends", "{{Audience}} Insights")
- keywords: Research query string
- industry: User's industry
- target_audience: User's target audience
- research_mode: "basic", "comprehensive", or "targeted"
- config: Complete ResearchConfig object with appropriate settings
- description: Brief explanation of what this preset researches
Make presets relevant to the user's specific industry, audience, and content goals.
7. RESEARCH PREFERENCES:
- "research_preferences": Extract and structure research preferences from onboarding:
- research_depth: From research_preferences.research_depth
- content_types: From research_preferences.content_types
- auto_research: From research_preferences.auto_research
- factual_content: From research_preferences.factual_content
=== OUTPUT REQUIREMENTS ===
Return a valid JSON object matching this exact structure:
{{
"default_industry": "string",
"default_target_audience": "string",
"default_research_mode": "basic" | "comprehensive" | "targeted",
"default_provider": "google" | "exa",
"suggested_keywords": ["keyword1", "keyword2", ...],
"keyword_expansion_patterns": {{
"keyword": ["expansion1", "expansion2", ...]
}},
"suggested_exa_domains": ["domain1.com", "domain2.com", ...],
"suggested_exa_category": "string or null",
"research_angles": ["angle1", "angle2", ...],
"query_enhancement_rules": {{
"pattern": "template"
}},
"recommended_presets": [
{{
"name": "string",
"keywords": "string",
"industry": "string",
"target_audience": "string",
"research_mode": "basic" | "comprehensive" | "targeted",
"config": {{
"mode": "basic" | "comprehensive" | "targeted",
"provider": "google" | "exa",
"max_sources": 10 | 15 | 12,
"include_statistics": true | false,
"include_expert_quotes": true | false,
"include_competitors": true | false,
"include_trends": true | false,
"exa_category": "string or null",
"exa_include_domains": ["domain1.com", ...],
"exa_search_type": "auto" | "keyword" | "neural"
}},
"description": "string"
}}
],
"research_preferences": {{
"research_depth": "string",
"content_types": ["type1", "type2", ...],
"auto_research": true | false,
"factual_content": true | false
}},
"version": "1.0",
"confidence_score": 85.0
}}
=== IMPORTANT INSTRUCTIONS ===
1. Be highly specific and personalized - use actual data from the user's business, persona, and preferences.
2. Avoid generic suggestions - every field should reflect the user's unique context.
3. For industries not clearly identified, infer from website_analysis.content_characteristics or writing_style.
4. Ensure all suggested keywords, domains, and angles are relevant to the user's industry and audience.
5. Generate realistic, actionable presets that the user would actually want to use.
6. Confidence score should reflect data richness (0-100): higher if rich onboarding data, lower if minimal data.
7. Return ONLY valid JSON - no markdown formatting, no explanatory text.
Generate the research persona now:
"""
return prompt
def get_json_schema(self) -> Dict[str, Any]:
"""Return JSON schema for structured LLM response."""
# This will be used with llm_text_gen(json_struct=...)
from models.research_persona_models import ResearchPersona, ResearchPreset
# Convert Pydantic model to JSON schema
return ResearchPersona.schema()

View File

@@ -0,0 +1,194 @@
"""
Research Persona Scheduler
Handles scheduled generation of research personas after onboarding.
"""
from datetime import datetime, timedelta, timezone
from typing import Dict, Any
from loguru import logger
from services.database import get_db_session
from services.research.research_persona_service import ResearchPersonaService
from models.scheduler_models import SchedulerEventLog
async def generate_research_persona_task(user_id: str):
"""
Async task function to generate research persona for a user.
This function is called by the scheduler 20 minutes after onboarding completion.
Args:
user_id: User ID (Clerk string)
"""
db = None
try:
logger.info(f"Scheduled research persona generation started for user {user_id}")
# Get database session
db = get_db_session()
if not db:
logger.error(f"Failed to get database session for research persona generation (user: {user_id})")
return
# Generate research persona
persona_service = ResearchPersonaService(db_session=db)
# Check if persona already exists to avoid unnecessary API calls
persona_data = persona_service._get_persona_data_record(user_id)
if persona_data and persona_data.research_persona:
logger.info(f"Research persona already exists for user {user_id}, skipping generation")
return
start_time = datetime.utcnow()
try:
research_persona = persona_service.get_or_generate(user_id, force_refresh=False)
execution_time = (datetime.utcnow() - start_time).total_seconds()
if research_persona:
logger.info(f"✅ Scheduled research persona generation completed for user {user_id}")
# Log success to scheduler event log for dashboard
try:
event_log = SchedulerEventLog(
event_type='job_completed',
event_date=start_time,
job_id=f"research_persona_{user_id}",
job_type='one_time',
user_id=user_id,
event_data={
'job_function': 'generate_research_persona_task',
'execution_time_seconds': execution_time,
'status': 'success'
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log persona generation success to scheduler event log: {log_error}")
if db:
db.rollback()
else:
error_msg = (
f"Scheduled research persona generation FAILED for user {user_id}. "
f"Expensive API call was made but generation failed. "
f"Will NOT automatically retry to prevent wasteful API calls."
)
logger.error(f"{error_msg}")
# Log failure to scheduler event log for dashboard visibility
try:
event_log = SchedulerEventLog(
event_type='job_failed',
event_date=start_time,
job_id=f"research_persona_{user_id}",
job_type='one_time',
user_id=user_id,
error_message=error_msg,
event_data={
'job_function': 'generate_research_persona_task',
'execution_time_seconds': execution_time,
'status': 'failed',
'failure_reason': 'generation_returned_none',
'expensive_api_call': True
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log persona generation failure to scheduler event log: {log_error}")
if db:
db.rollback()
# DO NOT reschedule - this prevents infinite retry loops
# User can manually trigger generation from frontend if needed
except Exception as gen_error:
execution_time = (datetime.utcnow() - start_time).total_seconds()
error_msg = (
f"Exception during scheduled research persona generation for user {user_id}: {str(gen_error)}. "
f"Expensive API call may have been made. Will NOT automatically retry."
)
logger.error(f"{error_msg}")
# Log exception to scheduler event log for dashboard visibility
try:
event_log = SchedulerEventLog(
event_type='job_failed',
event_date=start_time,
job_id=f"research_persona_{user_id}", # Match scheduled job ID format
job_type='one_time',
user_id=user_id,
error_message=error_msg,
event_data={
'job_function': 'generate_research_persona_task',
'execution_time_seconds': execution_time,
'status': 'failed',
'failure_reason': 'exception',
'exception_type': type(gen_error).__name__,
'exception_message': str(gen_error),
'expensive_api_call': True
}
)
db.add(event_log)
db.commit()
except Exception as log_error:
logger.warning(f"Failed to log persona generation exception to scheduler event log: {log_error}")
if db:
db.rollback()
# DO NOT reschedule - prevent infinite retry loops
except Exception as e:
logger.error(f"Error in scheduled research persona generation for user {user_id}: {e}")
finally:
if db:
try:
db.close()
except Exception as e:
logger.error(f"Error closing database session: {e}")
def schedule_research_persona_generation(user_id: str, delay_minutes: int = 20) -> str:
"""
Schedule research persona generation for a user after a delay.
Args:
user_id: User ID (Clerk string)
delay_minutes: Delay in minutes before generating persona (default: 20)
Returns:
Job ID
"""
try:
from services.scheduler import get_scheduler
scheduler = get_scheduler()
# Calculate run date (current time + delay) - ensure UTC timezone-aware
run_date = datetime.now(timezone.utc) + timedelta(minutes=delay_minutes)
# Generate consistent job ID (without timestamp) for proper restoration
# This allows restoration to find and restore the job with original scheduled time
# Note: Clerk user_id already includes "user_" prefix, so we don't add it again
job_id = f"research_persona_{user_id}"
# Schedule the task
scheduled_job_id = scheduler.schedule_one_time_task(
func=generate_research_persona_task,
run_date=run_date,
job_id=job_id,
kwargs={"user_id": user_id},
replace_existing=True
)
logger.info(
f"Scheduled research persona generation for user {user_id} "
f"at {run_date} (job_id: {scheduled_job_id})"
)
return scheduled_job_id
except Exception as e:
logger.error(f"Failed to schedule research persona generation for user {user_id}: {e}")
raise

View File

@@ -0,0 +1,384 @@
"""
Research Persona Service
Handles generation, caching, and retrieval of AI-powered research personas.
"""
from typing import Dict, Any, Optional
from datetime import datetime, timedelta
from loguru import logger
from fastapi import HTTPException
from services.database import get_db_session
from models.onboarding import PersonaData, OnboardingSession
from models.research_persona_models import ResearchPersona
from .research_persona_prompt_builder import ResearchPersonaPromptBuilder
from services.llm_providers.main_text_generation import llm_text_gen
from services.onboarding.database_service import OnboardingDatabaseService
from services.persona_data_service import PersonaDataService
class ResearchPersonaService:
"""Service for generating and managing research personas."""
CACHE_TTL_DAYS = 7 # 7-day cache TTL
def __init__(self, db_session=None):
self.db = db_session or get_db_session()
self.prompt_builder = ResearchPersonaPromptBuilder()
self.onboarding_service = OnboardingDatabaseService(db=self.db)
self.persona_data_service = PersonaDataService(db_session=self.db)
def get_cached_only(
self,
user_id: str
) -> Optional[ResearchPersona]:
"""
Get research persona for user ONLY if it exists in cache.
This method NEVER generates - it only returns cached personas.
Use this for config endpoints to avoid triggering rate limit checks.
Args:
user_id: User ID (Clerk string)
Returns:
ResearchPersona if cached and valid, None otherwise
"""
try:
# Get persona data record
persona_data = self._get_persona_data_record(user_id)
if not persona_data:
logger.debug(f"No persona data found for user {user_id}")
return None
# Only return if cache is valid and persona exists
if self.is_cache_valid(persona_data) and persona_data.research_persona:
try:
logger.debug(f"Returning cached research persona for user {user_id}")
return ResearchPersona(**persona_data.research_persona)
except Exception as e:
logger.warning(f"Failed to parse cached research persona: {e}")
return None
# Cache invalid or persona missing - return None (don't generate)
logger.debug(f"No valid cached research persona for user {user_id}")
return None
except Exception as e:
logger.error(f"Error getting cached research persona for user {user_id}: {e}")
return None
def get_or_generate(
self,
user_id: str,
force_refresh: bool = False
) -> Optional[ResearchPersona]:
"""
Get research persona for user, generating if missing or expired.
Args:
user_id: User ID (Clerk string)
force_refresh: If True, regenerate even if cache is valid
Returns:
ResearchPersona if successful, None otherwise
"""
try:
# Get persona data record
persona_data = self._get_persona_data_record(user_id)
if not persona_data:
logger.warning(f"No persona data found for user {user_id}, cannot generate research persona")
return None
# Check cache if not forcing refresh
if not force_refresh and self.is_cache_valid(persona_data):
if persona_data.research_persona:
logger.info(f"Using cached research persona for user {user_id}")
try:
return ResearchPersona(**persona_data.research_persona)
except Exception as e:
logger.warning(f"Failed to parse cached research persona: {e}, regenerating...")
# Fall through to regeneration
else:
logger.info(f"Research persona missing for user {user_id}, generating...")
else:
if force_refresh:
logger.info(f"Forcing refresh of research persona for user {user_id}")
else:
logger.info(f"Cache expired for user {user_id}, regenerating...")
# Generate new research persona
try:
research_persona = self.generate_research_persona(user_id)
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
raise
if research_persona:
# Save to database
if self.save_research_persona(user_id, research_persona):
logger.info(f"✅ Research persona generated and saved for user {user_id}")
else:
logger.warning(f"Failed to save research persona for user {user_id}")
return research_persona
else:
# Log detailed error for debugging expensive failures
logger.error(
f"❌ Failed to generate research persona for user {user_id} - "
f"This is an expensive failure (API call consumed). Check logs above for details."
)
# Don't return None silently - let the caller know this failed
return None
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
raise
except Exception as e:
logger.error(f"Error getting/generating research persona for user {user_id}: {e}")
return None
def generate_research_persona(self, user_id: str) -> Optional[ResearchPersona]:
"""
Generate a new research persona for the user.
Args:
user_id: User ID (Clerk string)
Returns:
ResearchPersona if successful, None otherwise
"""
try:
logger.info(f"Generating research persona for user {user_id}")
# Collect onboarding data
onboarding_data = self._collect_onboarding_data(user_id)
if not onboarding_data:
logger.warning(f"Insufficient onboarding data for user {user_id}")
return None
# Build prompt
prompt = self.prompt_builder.build_research_persona_prompt(onboarding_data)
# Get JSON schema for structured response
json_schema = self.prompt_builder.get_json_schema()
# Call LLM with structured JSON response
logger.info(f"Calling LLM for research persona generation (user: {user_id})")
try:
response_text = llm_text_gen(
prompt=prompt,
json_struct=json_schema,
user_id=user_id
)
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
logger.warning(f"HTTPException during LLM call for user {user_id} - re-raising")
raise
except RuntimeError as e:
# Re-raise RuntimeError (subscription limits) as HTTPException
logger.warning(f"RuntimeError during LLM call for user {user_id}: {e}")
raise HTTPException(status_code=429, detail=str(e))
if not response_text:
logger.error("Empty response from LLM")
return None
# Parse JSON response
import json
try:
# When json_struct is provided, llm_text_gen may return a dict directly
if isinstance(response_text, dict):
# Already parsed, use directly
persona_dict = response_text
elif isinstance(response_text, str):
# Handle case where LLM returns markdown-wrapped JSON or plain JSON string
response_text = response_text.strip()
if response_text.startswith("```json"):
response_text = response_text[7:]
if response_text.startswith("```"):
response_text = response_text[3:]
if response_text.endswith("```"):
response_text = response_text[:-3]
response_text = response_text.strip()
persona_dict = json.loads(response_text)
else:
logger.error(f"Unexpected response type from LLM: {type(response_text)}")
return None
# Add generated_at timestamp
persona_dict["generated_at"] = datetime.utcnow().isoformat()
# Validate and create ResearchPersona
# Log the dict structure for debugging if validation fails
try:
research_persona = ResearchPersona(**persona_dict)
logger.info(f"✅ Research persona generated successfully for user {user_id}")
return research_persona
except Exception as validation_error:
logger.error(f"Failed to validate ResearchPersona from dict: {validation_error}")
logger.debug(f"Persona dict keys: {list(persona_dict.keys()) if isinstance(persona_dict, dict) else 'Not a dict'}")
logger.debug(f"Persona dict sample: {str(persona_dict)[:500]}")
# Re-raise to be caught by outer exception handler
raise
except json.JSONDecodeError as e:
logger.error(f"Failed to parse LLM response as JSON: {e}")
logger.debug(f"Response text: {response_text[:500] if isinstance(response_text, str) else str(response_text)[:500]}")
return None
except Exception as e:
logger.error(f"Failed to create ResearchPersona from response: {e}")
return None
except HTTPException:
# Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
raise
except Exception as e:
logger.error(f"Error generating research persona for user {user_id}: {e}")
return None
def is_cache_valid(self, persona_data: PersonaData) -> bool:
"""
Check if cached research persona is still valid (within TTL).
Args:
persona_data: PersonaData database record
Returns:
True if cache is valid, False otherwise
"""
if not persona_data.research_persona_generated_at:
return False
# Check if within TTL
cache_age = datetime.utcnow() - persona_data.research_persona_generated_at
is_valid = cache_age < timedelta(days=self.CACHE_TTL_DAYS)
if not is_valid:
logger.debug(f"Cache expired (age: {cache_age.days} days, TTL: {self.CACHE_TTL_DAYS} days)")
return is_valid
def save_research_persona(
self,
user_id: str,
research_persona: ResearchPersona
) -> bool:
"""
Save research persona to database.
Args:
user_id: User ID (Clerk string)
research_persona: ResearchPersona to save
Returns:
True if successful, False otherwise
"""
try:
persona_data = self._get_persona_data_record(user_id)
if not persona_data:
logger.error(f"No persona data record found for user {user_id}")
return False
# Convert ResearchPersona to dict for JSON storage
persona_dict = research_persona.dict()
# Update database record
persona_data.research_persona = persona_dict
persona_data.research_persona_generated_at = datetime.utcnow()
self.db.commit()
logger.info(f"✅ Research persona saved for user {user_id}")
return True
except Exception as e:
logger.error(f"Error saving research persona for user {user_id}: {e}")
self.db.rollback()
return False
def _get_persona_data_record(self, user_id: str) -> Optional[PersonaData]:
"""Get PersonaData database record for user."""
try:
# Ensure research_persona columns exist before querying
self.onboarding_service._ensure_research_persona_columns(self.db)
# Get onboarding session
session = self.db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).first()
if not session:
return None
# Get persona data
persona_data = self.db.query(PersonaData).filter(
PersonaData.session_id == session.id
).first()
return persona_data
except Exception as e:
logger.error(f"Error getting persona data record for user {user_id}: {e}")
return None
def _collect_onboarding_data(self, user_id: str) -> Optional[Dict[str, Any]]:
"""
Collect all onboarding data needed for research persona generation.
Returns:
Dictionary with website_analysis, persona_data, research_preferences, business_info
"""
try:
# Get website analysis
website_analysis = self.onboarding_service.get_website_analysis(user_id, self.db) or {}
# Get persona data
persona_data_dict = self.onboarding_service.get_persona_data(user_id, self.db) or {}
# Get research preferences
research_prefs = self.onboarding_service.get_research_preferences(user_id, self.db) or {}
# Get business info - construct from persona data and website analysis
business_info = {}
# Try to extract from persona data
if persona_data_dict:
core_persona = persona_data_dict.get('corePersona') or persona_data_dict.get('core_persona')
if core_persona:
if core_persona.get('industry'):
business_info['industry'] = core_persona['industry']
if core_persona.get('target_audience'):
business_info['target_audience'] = core_persona['target_audience']
# Fallback to website analysis if not in persona
if not business_info.get('industry') and website_analysis:
target_audience_data = website_analysis.get('target_audience', {})
if isinstance(target_audience_data, dict):
industry_focus = target_audience_data.get('industry_focus')
if industry_focus:
business_info['industry'] = industry_focus
demographics = target_audience_data.get('demographics')
if demographics:
business_info['target_audience'] = demographics if isinstance(demographics, str) else str(demographics)
# Check if we have enough data
if not website_analysis and not persona_data_dict:
logger.warning(f"Insufficient onboarding data for user {user_id}")
return None
return {
"website_analysis": website_analysis,
"persona_data": persona_data_dict,
"research_preferences": research_prefs,
"business_info": business_info
}
except Exception as e:
logger.error(f"Error collecting onboarding data for user {user_id}: {e}")
return None

View File

@@ -10,7 +10,9 @@ from .core.exception_handler import (
TaskExecutionError, DatabaseError, TaskLoaderError, SchedulerConfigError
)
from .executors.monitoring_task_executor import MonitoringTaskExecutor
from .executors.oauth_token_monitoring_executor import OAuthTokenMonitoringExecutor
from .utils.task_loader import load_due_monitoring_tasks
from .utils.oauth_token_task_loader import load_due_oauth_token_monitoring_tasks
# Global scheduler instance (initialized on first access)
_scheduler_instance: TaskScheduler = None
@@ -37,6 +39,14 @@ def get_scheduler() -> TaskScheduler:
monitoring_executor,
load_due_monitoring_tasks
)
# Register OAuth token monitoring executor
oauth_token_executor = OAuthTokenMonitoringExecutor()
_scheduler_instance.register_executor(
'oauth_token_monitoring',
oauth_token_executor,
load_due_oauth_token_monitoring_tasks
)
return _scheduler_instance
@@ -46,6 +56,7 @@ __all__ = [
'TaskExecutor',
'TaskExecutionResult',
'MonitoringTaskExecutor',
'OAuthTokenMonitoringExecutor',
'get_scheduler',
# Exception handling
'SchedulerExceptionHandler',

View File

@@ -0,0 +1,141 @@
"""
Check Cycle Handler
Handles the main scheduler check cycle that finds and executes due tasks.
"""
from typing import TYPE_CHECKING, Dict, Any
from datetime import datetime
from sqlalchemy.orm import Session
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from models.scheduler_models import SchedulerEventLog
from .exception_handler import DatabaseError
from .interval_manager import adjust_check_interval_if_needed
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("check_cycle_handler")
async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
"""
Main scheduler loop: check for due tasks and execute them.
This runs periodically with intelligent interval adjustment based on active strategies.
Args:
scheduler: TaskScheduler instance
"""
scheduler.stats['total_checks'] += 1
check_start_time = datetime.utcnow()
scheduler.stats['last_check'] = check_start_time.isoformat()
# Track execution summary for this check cycle
cycle_summary = {
'tasks_found_by_type': {},
'tasks_executed_by_type': {},
'tasks_failed_by_type': {},
'total_found': 0,
'total_executed': 0,
'total_failed': 0
}
db = None
try:
db = get_db_session()
if db is None:
logger.error("[Scheduler Check] ❌ Failed to get database session")
return
# Check for active strategies and adjust interval intelligently
await adjust_check_interval_if_needed(scheduler, db)
# Check each registered task type
registered_types = scheduler.registry.get_registered_types()
for task_type in registered_types:
type_summary = await scheduler._process_task_type(task_type, db, cycle_summary)
if type_summary:
cycle_summary['tasks_found_by_type'][task_type] = type_summary.get('found', 0)
cycle_summary['tasks_executed_by_type'][task_type] = type_summary.get('executed', 0)
cycle_summary['tasks_failed_by_type'][task_type] = type_summary.get('failed', 0)
# Calculate totals
cycle_summary['total_found'] = sum(cycle_summary['tasks_found_by_type'].values())
cycle_summary['total_executed'] = sum(cycle_summary['tasks_executed_by_type'].values())
cycle_summary['total_failed'] = sum(cycle_summary['tasks_failed_by_type'].values())
# Log comprehensive check cycle summary
check_duration = (datetime.utcnow() - check_start_time).total_seconds()
active_strategies = scheduler.stats.get('active_strategies_count', 0)
active_executions = len(scheduler.active_executions)
# Build comprehensive check cycle summary log message
check_lines = [
f"[Scheduler Check] 🔍 Check Cycle #{scheduler.stats['total_checks']} Completed",
f" ├─ Duration: {check_duration:.2f}s",
f" ├─ Active Strategies: {active_strategies}",
f" ├─ Check Interval: {scheduler.current_check_interval_minutes}min",
f" ├─ User Isolation: Enabled (tasks filtered by user_id)",
f" ├─ Tasks Found: {cycle_summary['total_found']} total"
]
if cycle_summary['tasks_found_by_type']:
task_types_list = list(cycle_summary['tasks_found_by_type'].items())
for idx, (task_type, count) in enumerate(task_types_list):
executed = cycle_summary['tasks_executed_by_type'].get(task_type, 0)
failed = cycle_summary['tasks_failed_by_type'].get(task_type, 0)
is_last_task_type = idx == len(task_types_list) - 1 and cycle_summary['total_executed'] == 0 and cycle_summary['total_failed'] == 0
prefix = " └─" if is_last_task_type else " ├─"
check_lines.append(f"{prefix} {task_type}: {count} found, {executed} executed, {failed} failed")
if cycle_summary['total_found'] > 0:
check_lines.append(f" ├─ Total Executed: {cycle_summary['total_executed']}")
check_lines.append(f" ├─ Total Failed: {cycle_summary['total_failed']}")
check_lines.append(f" └─ Active Executions: {active_executions}/{scheduler.max_concurrent_executions}")
else:
check_lines.append(f" └─ No tasks found - scheduler idle")
# Log comprehensive check cycle summary in single message
logger.warning("\n".join(check_lines))
# Save check cycle event to database for historical tracking
try:
event_log = SchedulerEventLog(
event_type='check_cycle',
event_date=check_start_time,
check_cycle_number=scheduler.stats['total_checks'],
check_interval_minutes=scheduler.current_check_interval_minutes,
tasks_found=cycle_summary.get('total_found', 0),
tasks_executed=cycle_summary.get('total_executed', 0),
tasks_failed=cycle_summary.get('total_failed', 0),
tasks_by_type=cycle_summary.get('tasks_found_by_type', {}),
check_duration_seconds=check_duration,
active_strategies_count=active_strategies,
active_executions=active_executions,
event_data={
'executed_by_type': cycle_summary.get('tasks_executed_by_type', {}),
'failed_by_type': cycle_summary.get('tasks_failed_by_type', {})
}
)
db.add(event_log)
db.commit()
except Exception as e:
logger.warning(f"Failed to save check cycle event log: {e}")
if db:
db.rollback()
# Update last_update timestamp for frontend polling
scheduler.stats['last_update'] = datetime.utcnow().isoformat()
except Exception as e:
error = DatabaseError(
message=f"Error checking for due tasks: {str(e)}",
original_error=e
)
scheduler.exception_handler.handle_exception(error)
logger.error(f"[Scheduler Check] ❌ Error in check cycle: {str(e)}")
finally:
if db:
db.close()

View File

@@ -0,0 +1,139 @@
"""
Interval Manager
Handles intelligent scheduling interval adjustment based on active strategies.
"""
from typing import TYPE_CHECKING
from datetime import datetime
from sqlalchemy.orm import Session
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from models.scheduler_models import SchedulerEventLog
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("interval_manager")
async def determine_optimal_interval(
scheduler: 'TaskScheduler',
min_interval: int,
max_interval: int
) -> int:
"""
Determine optimal check interval based on active strategies.
Args:
scheduler: TaskScheduler instance
min_interval: Minimum check interval in minutes
max_interval: Maximum check interval in minutes
Returns:
Optimal check interval in minutes
"""
db = None
try:
db = get_db_session()
if db:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
scheduler.stats['active_strategies_count'] = active_count
if active_count > 0:
logger.info(f"Found {active_count} active strategies with tasks - using {min_interval}min interval")
return min_interval
else:
logger.info(f"No active strategies with tasks - using {max_interval}min interval")
return max_interval
except Exception as e:
logger.warning(f"Error determining optimal interval: {e}, using default {min_interval}min")
finally:
if db:
db.close()
# Default to shorter interval on error (safer)
return min_interval
async def adjust_check_interval_if_needed(
scheduler: 'TaskScheduler',
db: Session
):
"""
Intelligently adjust check interval based on active strategies.
If there are active strategies with tasks, check more frequently.
If there are no active strategies, check less frequently.
Args:
scheduler: TaskScheduler instance
db: Database session
"""
try:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
scheduler.stats['active_strategies_count'] = active_count
# Determine optimal interval
if active_count > 0:
optimal_interval = scheduler.min_check_interval_minutes
else:
optimal_interval = scheduler.max_check_interval_minutes
# Only reschedule if interval needs to change
if optimal_interval != scheduler.current_check_interval_minutes:
interval_message = (
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
f" ├─ Optimal: {optimal_interval}min\n"
f" ├─ Active Strategies: {active_count}\n"
f" └─ Reason: {'Active strategies detected' if active_count > 0 else 'No active strategies'}"
)
logger.warning(interval_message)
# Reschedule the job with new interval
scheduler.scheduler.modify_job(
'check_due_tasks',
trigger=scheduler._get_trigger_for_interval(optimal_interval)
)
# Save previous interval before updating
previous_interval = scheduler.current_check_interval_minutes
# Update current interval
scheduler.current_check_interval_minutes = optimal_interval
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
# Save interval adjustment event to database
try:
event_db = get_db_session()
if event_db:
event_log = SchedulerEventLog(
event_type='interval_adjustment',
event_date=datetime.utcnow(),
previous_interval_minutes=previous_interval,
new_interval_minutes=optimal_interval,
check_interval_minutes=optimal_interval,
active_strategies_count=active_count,
event_data={
'reason': 'intelligent_scheduling',
'min_interval': scheduler.min_check_interval_minutes,
'max_interval': scheduler.max_check_interval_minutes
}
)
event_db.add(event_log)
event_db.commit()
event_db.close()
except Exception as e:
logger.warning(f"Failed to save interval adjustment event log: {e}")
logger.warning(f"[Scheduler] ✅ Interval adjusted to {optimal_interval}min")
except Exception as e:
logger.warning(f"Error adjusting check interval: {e}")

View File

@@ -0,0 +1,269 @@
"""
Job Restoration
Handles restoration of one-time jobs (e.g., persona generation) on scheduler startup.
Preserves original scheduled times from database to avoid rescheduling on server restarts.
"""
from typing import TYPE_CHECKING
from datetime import datetime, timezone, timedelta
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from models.scheduler_models import SchedulerEventLog
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("job_restoration")
async def restore_persona_jobs(scheduler: 'TaskScheduler'):
"""
Restore one-time persona generation jobs for users who completed onboarding
but don't have personas yet. This ensures jobs persist across server restarts.
IMPORTANT: Preserves original scheduled times from SchedulerEventLog to avoid
rescheduling jobs with new times on server restarts.
Args:
scheduler: TaskScheduler instance
"""
try:
db = get_db_session()
if not db:
logger.warning("Could not get database session to restore persona jobs")
return
try:
from models.onboarding import OnboardingSession
from services.research.research_persona_scheduler import (
schedule_research_persona_generation,
generate_research_persona_task
)
from services.persona.facebook.facebook_persona_scheduler import (
schedule_facebook_persona_generation,
generate_facebook_persona_task
)
from services.research.research_persona_service import ResearchPersonaService
from services.persona_data_service import PersonaDataService
# Get all users who completed onboarding
completed_sessions = db.query(OnboardingSession).filter(
OnboardingSession.progress == 100.0
).all()
restored_count = 0
skipped_count = 0
now = datetime.utcnow().replace(tzinfo=timezone.utc)
for session in completed_sessions:
user_id = session.user_id
# Restore research persona job
try:
research_service = ResearchPersonaService(db_session=db)
persona_data_record = research_service._get_persona_data_record(user_id)
research_persona_exists = False
if persona_data_record:
research_persona_data = getattr(persona_data_record, 'research_persona', None)
research_persona_exists = bool(research_persona_data)
if not research_persona_exists:
# Note: Clerk user_id already includes "user_" prefix
job_id = f"research_persona_{user_id}"
# Check if job already exists in scheduler (just started, so unlikely)
existing_jobs = [j for j in scheduler.scheduler.get_jobs()
if j.id == job_id]
if not existing_jobs:
# Check SchedulerEventLog for original scheduled time
original_scheduled_event = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type == 'job_scheduled',
SchedulerEventLog.job_id == job_id,
SchedulerEventLog.user_id == user_id
).order_by(SchedulerEventLog.event_date.desc()).first()
# Check if job was already completed or failed
completed_event = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type.in_(['job_completed', 'job_failed']),
SchedulerEventLog.job_id == job_id,
SchedulerEventLog.user_id == user_id
).order_by(SchedulerEventLog.event_date.desc()).first()
if completed_event:
# Job was already completed/failed, skip
skipped_count += 1
logger.debug(f"Research persona job {job_id} already completed/failed, skipping restoration")
elif original_scheduled_event and original_scheduled_event.event_data:
# Restore with original scheduled time
scheduled_for_str = original_scheduled_event.event_data.get('scheduled_for')
if scheduled_for_str:
try:
original_time = datetime.fromisoformat(scheduled_for_str.replace('Z', '+00:00'))
if original_time.tzinfo is None:
original_time = original_time.replace(tzinfo=timezone.utc)
# Check if original time is in the past (within grace period)
time_since_scheduled = (now - original_time).total_seconds()
if time_since_scheduled > 0 and time_since_scheduled <= 3600: # Within 1 hour grace period
# Execute immediately (missed job)
logger.warning(f"Restoring research persona job {job_id} - original time was {original_time}, executing now (missed)")
try:
await generate_research_persona_task(user_id)
except Exception as exec_error:
logger.error(f"Error executing missed research persona job {job_id}: {exec_error}")
elif original_time > now:
# Restore with original future time
time_until_run = (original_time - now).total_seconds() / 60 # minutes
logger.warning(
f"[Restoration] Restoring research persona job {job_id} with ORIGINAL scheduled time: "
f"{original_time} (UTC) = {original_time.astimezone().strftime('%H:%M:%S %Z')} (local), "
f"will run in {time_until_run:.1f} minutes"
)
scheduler.schedule_one_time_task(
func=generate_research_persona_task,
run_date=original_time,
job_id=job_id,
kwargs={'user_id': user_id},
replace_existing=True
)
restored_count += 1
else:
# Too old (beyond grace period), skip
skipped_count += 1
logger.debug(f"Research persona job {job_id} scheduled time {original_time} is too old, skipping")
except Exception as time_error:
logger.warning(f"Error parsing original scheduled time for {job_id}: {time_error}, scheduling new job")
# Fall through to schedule new job
schedule_research_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
# No original time in event data, schedule new job
logger.warning(
f"[Restoration] No original scheduled time found for research persona job {job_id}, "
f"scheduling NEW job with current time + 20 minutes"
)
schedule_research_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
# No previous scheduled event, schedule new job
logger.warning(
f"[Restoration] No previous scheduled event found for research persona job {job_id}, "
f"scheduling NEW job with current time + 20 minutes"
)
schedule_research_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
skipped_count += 1
logger.debug(f"Research persona job {job_id} already exists in scheduler, skipping restoration")
except Exception as e:
logger.debug(f"Could not restore research persona for user {user_id}: {e}")
# Restore Facebook persona job
try:
persona_data_service = PersonaDataService(db_session=db)
persona_data = persona_data_service.get_user_persona_data(user_id)
platform_personas = persona_data.get('platform_personas', {}) if persona_data else {}
facebook_persona_exists = bool(platform_personas.get('facebook') if platform_personas else None)
has_core_persona = bool(persona_data.get('core_persona') if persona_data else False)
if not facebook_persona_exists and has_core_persona:
# Note: Clerk user_id already includes "user_" prefix
job_id = f"facebook_persona_{user_id}"
# Check if job already exists in scheduler
existing_jobs = [j for j in scheduler.scheduler.get_jobs()
if j.id == job_id]
if not existing_jobs:
# Check SchedulerEventLog for original scheduled time
original_scheduled_event = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type == 'job_scheduled',
SchedulerEventLog.job_id == job_id,
SchedulerEventLog.user_id == user_id
).order_by(SchedulerEventLog.event_date.desc()).first()
# Check if job was already completed or failed
completed_event = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type.in_(['job_completed', 'job_failed']),
SchedulerEventLog.job_id == job_id,
SchedulerEventLog.user_id == user_id
).order_by(SchedulerEventLog.event_date.desc()).first()
if completed_event:
skipped_count += 1
logger.debug(f"Facebook persona job {job_id} already completed/failed, skipping restoration")
elif original_scheduled_event and original_scheduled_event.event_data:
# Restore with original scheduled time
scheduled_for_str = original_scheduled_event.event_data.get('scheduled_for')
if scheduled_for_str:
try:
original_time = datetime.fromisoformat(scheduled_for_str.replace('Z', '+00:00'))
if original_time.tzinfo is None:
original_time = original_time.replace(tzinfo=timezone.utc)
# Check if original time is in the past (within grace period)
time_since_scheduled = (now - original_time).total_seconds()
if time_since_scheduled > 0 and time_since_scheduled <= 3600: # Within 1 hour grace period
# Execute immediately (missed job)
logger.warning(f"Restoring Facebook persona job {job_id} - original time was {original_time}, executing now (missed)")
try:
await generate_facebook_persona_task(user_id)
except Exception as exec_error:
logger.error(f"Error executing missed Facebook persona job {job_id}: {exec_error}")
elif original_time > now:
# Restore with original future time
time_until_run = (original_time - now).total_seconds() / 60 # minutes
logger.warning(
f"[Restoration] Restoring Facebook persona job {job_id} with ORIGINAL scheduled time: "
f"{original_time} (UTC) = {original_time.astimezone().strftime('%H:%M:%S %Z')} (local), "
f"will run in {time_until_run:.1f} minutes"
)
scheduler.schedule_one_time_task(
func=generate_facebook_persona_task,
run_date=original_time,
job_id=job_id,
kwargs={'user_id': user_id},
replace_existing=True
)
restored_count += 1
else:
skipped_count += 1
logger.debug(f"Facebook persona job {job_id} scheduled time {original_time} is too old, skipping")
except Exception as time_error:
logger.warning(f"Error parsing original scheduled time for {job_id}: {time_error}, scheduling new job")
schedule_facebook_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
logger.warning(
f"[Restoration] No original scheduled time found for Facebook persona job {job_id}, "
f"scheduling NEW job with current time + 20 minutes"
)
schedule_facebook_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
# No previous scheduled event, schedule new job
logger.warning(
f"[Restoration] No previous scheduled event found for Facebook persona job {job_id}, "
f"scheduling NEW job with current time + 20 minutes"
)
schedule_facebook_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
skipped_count += 1
logger.debug(f"Facebook persona job {job_id} already exists in scheduler, skipping restoration")
except Exception as e:
logger.debug(f"Could not restore Facebook persona for user {user_id}: {e}")
if restored_count > 0:
logger.warning(f"[Scheduler] ✅ Restored {restored_count} persona generation job(s) on startup (preserved original scheduled times)")
if skipped_count > 0:
logger.debug(f"[Scheduler] Skipped {skipped_count} persona job(s) (already completed/failed or exist)")
finally:
db.close()
except Exception as e:
logger.warning(f"Error restoring persona jobs: {e}")

View File

@@ -0,0 +1,196 @@
"""
OAuth Token Monitoring Task Restoration
Automatically creates missing OAuth monitoring tasks for users who have connected platforms
but don't have monitoring tasks created yet.
"""
from datetime import datetime, timedelta
from typing import List
from sqlalchemy.orm import Session
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
from services.oauth_token_monitoring_service import get_connected_platforms, create_oauth_monitoring_tasks
# Use service logger for consistent logging (WARNING level visible in production)
logger = get_service_logger("oauth_task_restoration")
async def restore_oauth_monitoring_tasks(scheduler):
"""
Restore/create missing OAuth token monitoring tasks for all users.
This checks all users who have connected platforms and ensures they have
monitoring tasks created. Tasks are created for platforms that are:
- Connected (detected via get_connected_platforms)
- Missing monitoring tasks (no OAuthTokenMonitoringTask exists)
Args:
scheduler: TaskScheduler instance
"""
try:
logger.warning("[OAuth Task Restoration] Starting OAuth monitoring task restoration...")
db = get_db_session()
if not db:
logger.warning("[OAuth Task Restoration] Could not get database session")
return
try:
# Get all existing OAuth tasks to find unique user_ids
existing_tasks = db.query(OAuthTokenMonitoringTask).all()
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
# Log existing tasks breakdown by platform
existing_by_platform = {}
for task in existing_tasks:
existing_by_platform[task.platform] = existing_by_platform.get(task.platform, 0) + 1
platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(existing_by_platform.items())])
logger.warning(
f"[OAuth Task Restoration] Found {len(existing_tasks)} existing OAuth tasks "
f"for {len(user_ids_with_tasks)} users. Platforms: {platform_summary}"
)
# Check users who already have at least one OAuth task
users_to_check = list(user_ids_with_tasks)
# Also query all users from onboarding who completed step 5 (integrations)
# to catch users who connected platforms but tasks weren't created
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
# This matches the logic used in home page redirect and persona generation checks
try:
from services.onboarding.progress_service import get_onboarding_progress_service
from models.onboarding import OnboardingSession
from sqlalchemy import or_
# Get onboarding progress service (same as used throughout the app)
progress_service = get_onboarding_progress_service()
# Query all sessions and filter using the same completion logic as the service
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
completed_sessions = db.query(OnboardingSession).filter(
or_(
OnboardingSession.current_step >= 6,
OnboardingSession.progress >= 100.0
)
).all()
# Validate using the service method for consistency
onboarding_user_ids = set()
for session in completed_sessions:
# Use the same service method as the rest of the app
status = progress_service.get_onboarding_status(session.user_id)
if status.get('is_completed', False):
onboarding_user_ids.add(session.user_id)
all_user_ids = users_to_check.copy()
# Add users from onboarding who might not have tasks yet
for user_id in onboarding_user_ids:
if user_id not in all_user_ids:
all_user_ids.append(user_id)
users_to_check = all_user_ids
logger.warning(
f"[OAuth Task Restoration] Checking {len(users_to_check)} users "
f"({len(user_ids_with_tasks)} with existing tasks, "
f"{len(onboarding_user_ids)} from onboarding sessions, "
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
)
except Exception as e:
logger.warning(f"[OAuth Task Restoration] Could not query onboarding users: {e}")
# Fallback to users with existing tasks only
total_created = 0
for user_id in users_to_check:
try:
# Get connected platforms for this user
connected_platforms = get_connected_platforms(user_id)
logger.warning(
f"[OAuth Task Restoration] User {user_id}: "
f"Connected platforms: {connected_platforms}"
)
if not connected_platforms:
logger.debug(
f"[OAuth Task Restoration] No connected platforms for user {user_id}, skipping"
)
continue
# Check which platforms are missing tasks
existing_platforms = {
task.platform
for task in existing_tasks
if task.user_id == user_id
}
missing_platforms = [
platform
for platform in connected_platforms
if platform not in existing_platforms
]
if missing_platforms:
logger.warning(
f"[OAuth Task Restoration] ⚠️ User {user_id} has connected platforms "
f"{connected_platforms} but missing tasks for: {missing_platforms}"
)
# Create missing tasks
created = create_oauth_monitoring_tasks(
user_id=user_id,
db=db,
platforms=missing_platforms
)
total_created += len(created)
logger.warning(
f"[OAuth Task Restoration] ✅ Created {len(created)} missing OAuth tasks "
f"for user {user_id}, platforms: {missing_platforms}"
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ User {user_id} has all required tasks "
f"for connected platforms: {connected_platforms}"
)
except Exception as e:
logger.warning(
f"[OAuth Task Restoration] Error checking/creating tasks for user {user_id}: {e}",
exc_info=True
)
continue
# Final summary log with platform breakdown
final_existing_tasks = db.query(OAuthTokenMonitoringTask).all()
final_by_platform = {}
for task in final_existing_tasks:
final_by_platform[task.platform] = final_by_platform.get(task.platform, 0) + 1
final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())])
if total_created > 0:
logger.warning(
f"[OAuth Task Restoration] ✅ Created {total_created} missing OAuth monitoring tasks. "
f"Final platform breakdown: {final_platform_summary}"
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks. "
f"Platform breakdown: {final_platform_summary}"
)
finally:
db.close()
except Exception as e:
logger.error(
f"[OAuth Task Restoration] Error restoring OAuth monitoring tasks: {e}",
exc_info=True
)

View File

@@ -10,6 +10,7 @@ from datetime import datetime
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.interval import IntervalTrigger
from apscheduler.triggers.date import DateTrigger
from sqlalchemy.orm import Session
from .executor_interface import TaskExecutor, TaskExecutionResult
@@ -20,6 +21,13 @@ from .exception_handler import (
)
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from ..utils.user_job_store import get_user_job_store_name
from models.scheduler_models import SchedulerEventLog
from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed
from .job_restoration import restore_persona_jobs
from .oauth_task_restoration import restore_oauth_monitoring_tasks
from .check_cycle_handler import check_and_execute_due_tasks
from .task_execution_handler import execute_task_async
logger = get_service_logger("task_scheduler")
@@ -34,6 +42,14 @@ class TaskScheduler:
- Database-backed task persistence
- Configurable check intervals
- Automatic retry logic
- User isolation: All tasks are filtered by user_id for isolation
- Per-user job store context: Logs show user's website root for debugging
User Isolation:
- Tasks are filtered by user_id in task loaders
- Execution logs include user_id for tracking
- Per-user statistics are maintained
- Job store names (based on website root) are logged for debugging
"""
def __init__(
@@ -63,7 +79,7 @@ class TaskScheduler:
job_defaults={
'coalesce': True,
'max_instances': 1,
'misfire_grace_time': 300 # 5 minutes grace period
'misfire_grace_time': 3600 # 1 hour grace period for missed jobs
}
)
@@ -89,6 +105,7 @@ class TaskScheduler:
'tasks_failed': 0,
'tasks_skipped': 0,
'last_check': None,
'last_update': datetime.utcnow().isoformat(), # Timestamp for frontend polling
'per_user_stats': {}, # Track metrics per user for user isolation
'active_strategies_count': 0, # Track active strategies with tasks
'last_interval_adjustment': None # Track when interval was last adjusted
@@ -141,7 +158,11 @@ class TaskScheduler:
try:
# Determine initial check interval based on active strategies
initial_interval = await self._determine_optimal_interval()
initial_interval = await determine_optimal_interval(
self,
self.min_check_interval_minutes,
self.max_check_interval_minutes
)
self.current_check_interval_minutes = initial_interval
# Add periodic job to check for due tasks
@@ -155,16 +176,228 @@ class TaskScheduler:
self.scheduler.start()
self._running = True
logger.info(
f"Task scheduler started | "
f"check_interval={initial_interval}min | "
f"registered_types={self.registry.get_registered_types()}"
)
# Check for and execute any missed jobs that are still within grace period
await self._execute_missed_jobs()
# Restore one-time persona generation jobs for users who completed onboarding
await restore_persona_jobs(self)
# Restore/create missing OAuth token monitoring tasks for connected platforms
await restore_oauth_monitoring_tasks(self)
# Get all scheduled APScheduler jobs (including one-time tasks)
all_jobs = self.scheduler.get_jobs()
registered_types = self.registry.get_registered_types()
active_strategies = self.stats.get('active_strategies_count', 0)
# Count OAuth token monitoring tasks from database (recurring weekly tasks)
oauth_tasks_count = 0
oauth_tasks_details = []
try:
db = get_db_session()
if db:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
# Count active tasks
oauth_tasks_count = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.status == 'active'
).count()
# Get all tasks (for detailed logging)
all_oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
total_oauth_tasks = len(all_oauth_tasks)
# Show platform breakdown for ALL tasks (active and inactive)
all_platforms = {}
active_platforms = {}
for task in all_oauth_tasks:
all_platforms[task.platform] = all_platforms.get(task.platform, 0) + 1
if task.status == 'active':
active_platforms[task.platform] = active_platforms.get(task.platform, 0) + 1
if total_oauth_tasks > 0:
# Log details about all tasks (not just active)
for task in all_oauth_tasks:
oauth_tasks_details.append(
f"user={task.user_id}, platform={task.platform}, status={task.status}"
)
if total_oauth_tasks > 0 and oauth_tasks_count == 0:
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
logger.warning(
f"[Scheduler] Found {total_oauth_tasks} OAuth monitoring tasks in database, "
f"but {oauth_tasks_count} are active. "
f"All platforms: {all_platform_summary}. "
f"Task details: {', '.join(oauth_tasks_details[:5])}" # Limit to first 5 for readability
)
elif oauth_tasks_count > 0:
# Show platform breakdown for active tasks
active_platform_summary = ", ".join([f"{platform}: {count}" for platform, count in sorted(active_platforms.items())])
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
# Check for missing platforms (expected: gsc, bing, wordpress, wix)
expected_platforms = ['gsc', 'bing', 'wordpress', 'wix']
missing_in_db = [p for p in expected_platforms if p not in all_platforms]
if missing_in_db:
logger.warning(
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
f"All platforms: {all_platform_summary}. "
f"⚠️ Missing platforms (not connected or no tasks): {', '.join(missing_in_db)}"
)
else:
logger.warning(
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
f"All platforms: {all_platform_summary}"
)
db.close()
except Exception as e:
logger.warning(
f"[Scheduler] Could not get OAuth token monitoring tasks count: {e}. "
f"This may indicate the oauth_token_monitoring_tasks table doesn't exist yet or "
f"tasks haven't been created. Error type: {type(e).__name__}"
)
# Calculate job counts
apscheduler_recurring = 1 # check_due_tasks
apscheduler_one_time = len(all_jobs) - 1
total_recurring = apscheduler_recurring + oauth_tasks_count
total_jobs = len(all_jobs) + oauth_tasks_count
# Build comprehensive startup log message
startup_lines = [
f"[Scheduler] ✅ Task Scheduler Started",
f" ├─ Check Interval: {initial_interval} minutes",
f" ├─ Registered Task Types: {len(registered_types)} ({', '.join(registered_types) if registered_types else 'none'})",
f" ├─ Active Strategies: {active_strategies}",
f" ├─ Total Scheduled Jobs: {total_jobs}",
f" ├─ Recurring Jobs: {total_recurring} (check_due_tasks: {apscheduler_recurring}, OAuth monitoring: {oauth_tasks_count})",
f" └─ One-Time Jobs: {apscheduler_one_time}"
]
# Add APScheduler job details
if all_jobs:
for idx, job in enumerate(all_jobs):
is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0
prefix = " └─" if is_last else " ├─"
next_run = job.next_run_time
trigger_type = type(job.trigger).__name__
# Try to extract user_id from job ID or kwargs for context
user_context = ""
user_id_from_job = None
# First try to get from kwargs
if hasattr(job, 'kwargs') and job.kwargs and job.kwargs.get('user_id'):
user_id_from_job = job.kwargs.get('user_id')
# Otherwise, try to extract from job ID (e.g., "research_persona_user_123..." or "research_persona_user123")
elif job.id and ('research_persona_' in job.id or 'facebook_persona_' in job.id):
# Job ID format: research_persona_{user_id} or facebook_persona_{user_id}
# where user_id is Clerk format (e.g., "user_33Gz1FPI86VDXhRY8QN4ragRFGN")
if job.id.startswith('research_persona_'):
user_id_from_job = job.id.replace('research_persona_', '')
elif job.id.startswith('facebook_persona_'):
user_id_from_job = job.id.replace('facebook_persona_', '')
else:
# Fallback: try to extract from parts (old format with timestamp)
parts = job.id.split('_')
if len(parts) >= 3:
user_id_from_job = parts[2] # Extract user_id from job ID
if user_id_from_job:
try:
db = get_db_session()
if db:
user_job_store = get_user_job_store_name(user_id_from_job, db)
if user_job_store == 'default':
logger.debug(
f"[Scheduler] Job store extraction returned 'default' for user {user_id_from_job}. "
f"This may indicate no onboarding data or website URL not found."
)
user_context = f" | User: {user_id_from_job} | Store: {user_job_store}"
db.close()
except Exception as e:
logger.warning(
f"[Scheduler] Could not extract job store name for user {user_id_from_job}: {e}. "
f"Error type: {type(e).__name__}"
)
user_context = f" | User: {user_id_from_job}"
startup_lines.append(f"{prefix} Job: {job.id} | Trigger: {trigger_type} | Next Run: {next_run}{user_context}")
# Add OAuth token monitoring tasks details
# Show ALL OAuth tasks (active and inactive) for complete visibility
if total_oauth_tasks > 0:
try:
db = get_db_session()
if db:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
# Get ALL tasks, not just active ones
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
for idx, task in enumerate(oauth_tasks):
is_last = idx == len(oauth_tasks) - 1 and len(all_jobs) == 0
prefix = " └─" if is_last else " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
if user_job_store == 'default':
logger.debug(
f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
f"This may indicate no onboarding data or website URL not found."
)
except Exception as e:
logger.warning(
f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
f"Using 'default'. Error type: {type(e).__name__}"
)
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
# Include status in the log line for visibility
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
)
db.close()
except Exception as e:
logger.debug(f"Could not get OAuth token monitoring task details: {e}")
# Log comprehensive startup information in single message
logger.warning("\n".join(startup_lines))
# Save scheduler start event to database
try:
db = get_db_session()
if db:
event_log = SchedulerEventLog(
event_type='start',
event_date=datetime.utcnow(),
check_interval_minutes=initial_interval,
active_strategies_count=active_strategies,
event_data={
'registered_types': registered_types,
'total_jobs': total_jobs,
'recurring_jobs': total_recurring,
'one_time_jobs': apscheduler_one_time,
'oauth_monitoring_tasks': oauth_tasks_count
}
)
db.add(event_log)
db.commit()
db.close()
except Exception as e:
logger.warning(f"Failed to save scheduler start event log: {e}")
except Exception as e:
logger.error(f"Failed to start scheduler: {e}")
raise
async def stop(self):
"""Stop the scheduler gracefully."""
if not self._running:
@@ -182,11 +415,48 @@ class TaskScheduler:
timeout=30
)
# Get final job count before shutdown
all_jobs_before = self.scheduler.get_jobs()
# Shutdown scheduler
self.scheduler.shutdown(wait=True)
self._running = False
logger.info("Task scheduler stopped gracefully")
# Log comprehensive shutdown information (use WARNING level for visibility)
total_checks = self.stats.get('total_checks', 0)
total_executed = self.stats.get('tasks_executed', 0)
total_failed = self.stats.get('tasks_failed', 0)
shutdown_message = (
f"[Scheduler] 🛑 Task Scheduler Stopped\n"
f" ├─ Total Check Cycles: {total_checks}\n"
f" ├─ Total Tasks Executed: {total_executed}\n"
f" ├─ Total Tasks Failed: {total_failed}\n"
f" ├─ Jobs Cancelled: {len(all_jobs_before)}\n"
f" └─ Shutdown: Graceful"
)
logger.warning(shutdown_message)
# Save scheduler stop event to database
try:
db = get_db_session()
if db:
event_log = SchedulerEventLog(
event_type='stop',
event_date=datetime.utcnow(),
check_interval_minutes=self.current_check_interval_minutes,
event_data={
'total_checks': total_checks,
'total_executed': total_executed,
'total_failed': total_failed,
'jobs_cancelled': len(all_jobs_before)
}
)
db.add(event_log)
db.commit()
db.close()
except Exception as e:
logger.warning(f"Failed to save scheduler stop event log: {e}")
except Exception as e:
logger.error(f"Error stopping scheduler: {e}")
@@ -197,109 +467,50 @@ class TaskScheduler:
Main scheduler loop: check for due tasks and execute them.
This runs periodically with intelligent interval adjustment based on active strategies.
"""
self.stats['total_checks'] += 1
self.stats['last_check'] = datetime.utcnow().isoformat()
logger.debug("Checking for due tasks...")
db = None
try:
db = get_db_session()
if db is None:
logger.error("Failed to get database session")
return
# Check for active strategies and adjust interval intelligently
await self._adjust_check_interval_if_needed(db)
# Check each registered task type
for task_type in self.registry.get_registered_types():
await self._process_task_type(task_type, db)
except Exception as e:
error = DatabaseError(
message=f"Error checking for due tasks: {str(e)}",
original_error=e
)
self.exception_handler.handle_exception(error)
finally:
if db:
db.close()
async def _determine_optimal_interval(self) -> int:
"""
Determine optimal check interval based on active strategies.
Returns:
Optimal check interval in minutes
"""
db = None
try:
db = get_db_session()
if db:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
self.stats['active_strategies_count'] = active_count
if active_count > 0:
logger.info(f"Found {active_count} active strategies with tasks - using {self.min_check_interval_minutes}min interval")
return self.min_check_interval_minutes
else:
logger.info(f"No active strategies with tasks - using {self.max_check_interval_minutes}min interval")
return self.max_check_interval_minutes
except Exception as e:
logger.warning(f"Error determining optimal interval: {e}, using default {self.min_check_interval_minutes}min")
finally:
if db:
db.close()
# Default to shorter interval on error (safer)
return self.min_check_interval_minutes
await check_and_execute_due_tasks(self)
async def _adjust_check_interval_if_needed(self, db: Session):
"""
Intelligently adjust check interval based on active strategies.
If there are active strategies with tasks, check more frequently.
If there are no active strategies, check less frequently.
Args:
db: Database session
"""
await adjust_check_interval_if_needed(self, db)
async def _execute_missed_jobs(self):
"""
Check for and execute any missed DateTrigger jobs that are still within grace period.
APScheduler marks jobs as 'missed' if they were scheduled to run while the scheduler wasn't running.
"""
try:
from services.active_strategy_service import ActiveStrategyService
all_jobs = self.scheduler.get_jobs()
now = datetime.utcnow().replace(tzinfo=self.scheduler.timezone)
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
self.stats['active_strategies_count'] = active_count
missed_jobs = []
for job in all_jobs:
# Only check DateTrigger jobs (one-time tasks)
if hasattr(job, 'trigger') and isinstance(job.trigger, DateTrigger):
if job.next_run_time and job.next_run_time < now:
# Job's scheduled time has passed
time_since_scheduled = (now - job.next_run_time).total_seconds()
# Check if still within grace period (1 hour = 3600 seconds)
if time_since_scheduled <= 3600:
missed_jobs.append(job)
# Determine optimal interval
if active_count > 0:
optimal_interval = self.min_check_interval_minutes
else:
optimal_interval = self.max_check_interval_minutes
# Only reschedule if interval needs to change
if optimal_interval != self.current_check_interval_minutes:
logger.info(
f"Adjusting scheduler interval: {self.current_check_interval_minutes}min → {optimal_interval}min | "
f"active_strategies={active_count}"
if missed_jobs:
logger.warning(
f"[Scheduler] Found {len(missed_jobs)} missed job(s) within grace period, executing now..."
)
# Reschedule the job with new interval
self.scheduler.modify_job(
'check_due_tasks',
trigger=self._get_trigger_for_interval(optimal_interval)
)
self.current_check_interval_minutes = optimal_interval
self.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
logger.info(f"Scheduler interval adjusted to {optimal_interval}min")
for job in missed_jobs:
try:
# Execute the job immediately
logger.info(f"[Scheduler] Executing missed job: {job.id}")
await job.func(*job.args, **job.kwargs)
except Exception as e:
logger.error(f"[Scheduler] Error executing missed job {job.id}: {e}")
except Exception as e:
logger.warning(f"Error adjusting check interval: {e}")
logger.warning(f"[Scheduler] Error checking for missed jobs: {e}")
async def trigger_interval_adjustment(self):
"""
@@ -315,14 +526,22 @@ class TaskScheduler:
try:
db = get_db_session()
if db:
await self._adjust_check_interval_if_needed(db)
await adjust_check_interval_if_needed(self, db)
db.close()
else:
logger.warning("Could not get database session for interval adjustment")
except Exception as e:
logger.warning(f"Error triggering interval adjustment: {e}")
async def _process_task_type(self, task_type: str, db: Session):
"""Process due tasks for a specific task type."""
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]:
"""
Process due tasks for a specific task type.
Returns:
Summary dict with 'found', 'executed', 'failed' counts, or None if no tasks
"""
summary = {'found': 0, 'executed': 0, 'failed': 0}
try:
# Get task loader for this type
try:
@@ -334,7 +553,7 @@ class TaskScheduler:
original_error=e
)
self.exception_handler.handle_exception(error)
return
return None
# Load due tasks (with error handling)
try:
@@ -346,28 +565,30 @@ class TaskScheduler:
original_error=e
)
self.exception_handler.handle_exception(error)
return
return None
if not due_tasks:
return
return None
summary['found'] = len(due_tasks)
self.stats['tasks_found'] += len(due_tasks)
logger.info(f"Found {len(due_tasks)} due tasks for type: {task_type}")
# Execute tasks (with concurrency limit)
execution_tasks = []
skipped_count = 0
for task in due_tasks:
if len(self.active_executions) >= self.max_concurrent_executions:
skipped_count = len(due_tasks) - len(execution_tasks)
logger.warning(
f"Max concurrent executions reached ({self.max_concurrent_executions}), "
f"skipping {len(due_tasks) - len(execution_tasks)} tasks"
f"[Scheduler] ⚠️ Max concurrent executions reached ({self.max_concurrent_executions}), "
f"skipping {skipped_count} tasks for {task_type}"
)
break
# Execute task asynchronously
# Note: Each task gets its own database session to prevent concurrent access issues
execution_task = asyncio.create_task(
self._execute_task_async(task_type, task)
execute_task_async(self, task_type, task, summary)
)
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
@@ -379,6 +600,8 @@ class TaskScheduler:
if execution_tasks:
await asyncio.wait(execution_tasks, timeout=300)
return summary
except Exception as e:
error = TaskLoaderError(
message=f"Error processing task type {task_type}: {str(e)}",
@@ -386,169 +609,8 @@ class TaskScheduler:
original_error=e
)
self.exception_handler.handle_exception(error)
return summary
async def _execute_task_async(self, task_type: str, task: Any):
"""
Execute a single task asynchronously with user isolation.
Each task gets its own database session to prevent concurrent access issues,
as SQLAlchemy sessions are not async-safe or concurrent-safe.
User context is extracted and tracked for user isolation.
Args:
task_type: Type of task
task: Task instance from database (detached from original session)
"""
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
db = None
user_id = None
try:
# Extract user context if available (for user isolation tracking)
try:
if hasattr(task, 'strategy') and task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id') and task.strategy_id:
# Will query user_id after we have db session
pass
except Exception as e:
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
logger.info(f"Executing task: {task_id} | user_id: {user_id}")
# Create a new database session for this async task
# SQLAlchemy sessions are not async-safe and cannot be shared across concurrent tasks
db = get_db_session()
if db is None:
error = DatabaseError(
message=f"Failed to get database session for task {task_id}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type
)
self.exception_handler.handle_exception(error, log_level="error")
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
return
# Set database session for exception handler
self.exception_handler.db = db
# Merge the detached task object into this session
# The task object was loaded in a different session and is now detached
from sqlalchemy.orm import object_session
if object_session(task) is None:
# Task is detached, need to merge it into this session
task = db.merge(task)
# Extract user_id after merge if not already available
if user_id is None and hasattr(task, 'strategy'):
try:
if task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id'):
# Query strategy if relationship not loaded
from models.enhanced_strategy_models import EnhancedContentStrategy
strategy = db.query(EnhancedContentStrategy).filter(
EnhancedContentStrategy.id == task.strategy_id
).first()
if strategy:
user_id = strategy.user_id
except Exception as e:
logger.debug(f"Could not extract user_id after merge for task {task_id}: {e}")
# Get executor for this task type
try:
executor = self.registry.get_executor(task_type)
except Exception as e:
from .exception_handler import SchedulerConfigError
error = SchedulerConfigError(
message=f"Failed to get executor for task type {task_type}: {str(e)}",
user_id=user_id,
context={
"task_id": getattr(task, 'id', None),
"task_type": task_type
},
original_error=e
)
self.exception_handler.handle_exception(error)
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
return
# Execute task with its own session (with error handling)
try:
result = await executor.execute_task(task, db)
# Handle result and update statistics
if result.success:
self.stats['tasks_executed'] += 1
self._update_user_stats(user_id, success=True)
logger.info(f"Task executed successfully: {task_id} | user_id: {user_id}")
else:
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
# Create structured error for failed execution
error = TaskExecutionError(
message=result.error_message or "Task execution failed",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
execution_time_ms=result.execution_time_ms,
context={"result_data": result.result_data}
)
self.exception_handler.handle_exception(error, log_level="warning")
# Retry logic if enabled
if self.enable_retries and result.retryable:
await self._schedule_retry(task, result.retry_delay)
except SchedulerException as e:
# Re-raise scheduler exceptions (they're already handled)
raise
except Exception as e:
# Wrap unexpected exceptions
error = TaskExecutionError(
message=f"Unexpected error during task execution: {str(e)}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
original_error=e
)
self.exception_handler.handle_exception(error)
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
except SchedulerException as e:
# Handle scheduler exceptions
self.exception_handler.handle_exception(e)
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
except Exception as e:
# Handle any other unexpected errors
error = TaskExecutionError(
message=f"Unexpected error in task execution wrapper: {str(e)}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
original_error=e
)
self.exception_handler.handle_exception(error)
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
finally:
# Clean up database session
if db:
try:
db.close()
except Exception as e:
logger.error(f"Error closing database session for task {task_id}: {e}")
# Remove from active executions
if task_id in self.active_executions:
del self.active_executions[task_id]
def _update_user_stats(self, user_id: Optional[int], success: bool):
"""
@@ -622,6 +684,117 @@ class TaskScheduler:
return base_stats
def schedule_one_time_task(
self,
func: Callable,
run_date: datetime,
job_id: str,
args: tuple = (),
kwargs: Dict[str, Any] = None,
replace_existing: bool = True
) -> str:
"""
Schedule a one-time task to run at a specific datetime.
Args:
func: Async function to execute
run_date: Datetime when the task should run (must be timezone-aware UTC)
job_id: Unique identifier for this job
args: Positional arguments to pass to func
kwargs: Keyword arguments to pass to func
replace_existing: If True, replace existing job with same ID
Returns:
Job ID
"""
if not self._running:
logger.warning(
f"Scheduler not running, but scheduling job {job_id} anyway. "
"APScheduler will start automatically when needed."
)
try:
# Ensure run_date is timezone-aware (UTC)
if run_date.tzinfo is None:
from datetime import timezone
run_date = run_date.replace(tzinfo=timezone.utc)
logger.debug(f"Added UTC timezone to run_date: {run_date}")
self.scheduler.add_job(
func,
trigger=DateTrigger(run_date=run_date),
args=args,
kwargs=kwargs or {},
id=job_id,
replace_existing=replace_existing,
misfire_grace_time=3600 # 1 hour grace period for missed jobs
)
# Get updated job count
all_jobs = self.scheduler.get_jobs()
one_time_jobs = [j for j in all_jobs if j.id != 'check_due_tasks']
# Extract user_id from kwargs if available for logging and job store
user_id = kwargs.get('user_id', None) if kwargs else None
func_name = func.__name__ if hasattr(func, '__name__') else str(func)
# Get job store name for user (if user_id provided)
job_store_name = 'default'
if user_id:
try:
db = get_db_session()
if db:
job_store_name = get_user_job_store_name(user_id, db)
db.close()
except Exception as e:
logger.warning(f"Could not determine job store for user {user_id}: {e}")
# Note: APScheduler doesn't support dynamic job store creation
# We use 'default' for all jobs but log the user's job store name for debugging
# The actual user isolation is handled through task filtering by user_id
# Log detailed one-time task scheduling information (use WARNING level for visibility)
log_message = (
f"[Scheduler] 📅 Scheduled One-Time Task\n"
f" ├─ Job ID: {job_id}\n"
f" ├─ Function: {func_name}\n"
f" ├─ User ID: {user_id or 'system'}\n"
f" ├─ Job Store: {job_store_name} (user context)\n"
f" ├─ Scheduled For: {run_date}\n"
f" ├─ Replace Existing: {replace_existing}\n"
f" ├─ Total One-Time Jobs: {len(one_time_jobs)}\n"
f" └─ Total Scheduled Jobs: {len(all_jobs)}"
)
logger.warning(log_message)
# Log job scheduling to event log for dashboard
try:
event_db = get_db_session()
if event_db:
event_log = SchedulerEventLog(
event_type='job_scheduled',
event_date=datetime.utcnow(),
job_id=job_id,
job_type='one_time',
user_id=user_id,
event_data={
'function_name': func_name,
'job_store': job_store_name,
'scheduled_for': run_date.isoformat(),
'replace_existing': replace_existing
}
)
event_db.add(event_log)
event_db.commit()
event_db.close()
except Exception as e:
logger.debug(f"Failed to log job scheduling event: {e}")
return job_id
except Exception as e:
logger.error(f"Failed to schedule one-time task {job_id}: {e}")
raise
def is_running(self) -> bool:
"""Check if scheduler is running."""
return self._running

View File

@@ -0,0 +1,197 @@
"""
Task Execution Handler
Handles asynchronous execution of individual tasks with proper session isolation.
"""
from typing import TYPE_CHECKING, Any, Dict, Optional
from sqlalchemy.orm import object_session
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from .exception_handler import (
SchedulerException, TaskExecutionError, DatabaseError, SchedulerConfigError
)
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("task_execution_handler")
async def execute_task_async(
scheduler: 'TaskScheduler',
task_type: str,
task: Any,
summary: Optional[Dict[str, Any]] = None
):
"""
Execute a single task asynchronously with user isolation.
Each task gets its own database session to prevent concurrent access issues,
as SQLAlchemy sessions are not async-safe or concurrent-safe.
User context is extracted and tracked for user isolation.
Args:
scheduler: TaskScheduler instance
task_type: Type of task
task: Task instance from database (detached from original session)
summary: Optional summary dict to update with execution results
"""
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
db = None
user_id = None
try:
# Extract user context if available (for user isolation tracking)
try:
if hasattr(task, 'strategy') and task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id') and task.strategy_id:
# Will query user_id after we have db session
pass
except Exception as e:
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
# Log task execution start (detailed for important tasks)
task_db_id = getattr(task, 'id', None)
if task_db_id:
logger.debug(f"[Scheduler] ▶️ Executing {task_type} task {task_db_id} | user_id: {user_id}")
# Create a new database session for this async task
# SQLAlchemy sessions are not async-safe and cannot be shared across concurrent tasks
db = get_db_session()
if db is None:
error = DatabaseError(
message=f"Failed to get database session for task {task_id}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type
)
scheduler.exception_handler.handle_exception(error, log_level="error")
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
return
# Set database session for exception handler
scheduler.exception_handler.db = db
# Merge the detached task object into this session
# The task object was loaded in a different session and is now detached
if object_session(task) is None:
# Task is detached, need to merge it into this session
task = db.merge(task)
# Extract user_id after merge if not already available
if user_id is None and hasattr(task, 'strategy'):
try:
if task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id'):
# Query strategy if relationship not loaded
from models.enhanced_strategy_models import EnhancedContentStrategy
strategy = db.query(EnhancedContentStrategy).filter(
EnhancedContentStrategy.id == task.strategy_id
).first()
if strategy:
user_id = strategy.user_id
except Exception as e:
logger.debug(f"Could not extract user_id after merge for task {task_id}: {e}")
# Get executor for this task type
try:
executor = scheduler.registry.get_executor(task_type)
except Exception as e:
error = SchedulerConfigError(
message=f"Failed to get executor for task type {task_type}: {str(e)}",
user_id=user_id,
context={
"task_id": getattr(task, 'id', None),
"task_type": task_type
},
original_error=e
)
scheduler.exception_handler.handle_exception(error)
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
return
# Execute task with its own session (with error handling)
try:
result = await executor.execute_task(task, db)
# Handle result and update statistics
if result.success:
scheduler.stats['tasks_executed'] += 1
scheduler._update_user_stats(user_id, success=True)
if summary:
summary['executed'] += 1
logger.debug(f"[Scheduler] ✅ Task {task_id} executed successfully | user_id: {user_id} | time: {result.execution_time_ms}ms")
else:
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
if summary:
summary['failed'] += 1
# Create structured error for failed execution
error = TaskExecutionError(
message=result.error_message or "Task execution failed",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
execution_time_ms=result.execution_time_ms,
context={"result_data": result.result_data}
)
scheduler.exception_handler.handle_exception(error, log_level="warning")
logger.warning(f"[Scheduler] ❌ Task {task_id} failed | user_id: {user_id} | error: {result.error_message}")
# Retry logic if enabled
if scheduler.enable_retries and result.retryable:
await scheduler._schedule_retry(task, result.retry_delay)
except SchedulerException as e:
# Re-raise scheduler exceptions (they're already handled)
raise
except Exception as e:
# Wrap unexpected exceptions
error = TaskExecutionError(
message=f"Unexpected error during task execution: {str(e)}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
original_error=e
)
scheduler.exception_handler.handle_exception(error)
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
except SchedulerException as e:
# Handle scheduler exceptions
scheduler.exception_handler.handle_exception(e)
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
except Exception as e:
# Handle any other unexpected errors
error = TaskExecutionError(
message=f"Unexpected error in task execution wrapper: {str(e)}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
original_error=e
)
scheduler.exception_handler.handle_exception(error)
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
finally:
# Clean up database session
if db:
try:
db.close()
except Exception as e:
logger.error(f"Error closing database session for task {task_id}: {e}")
# Remove from active executions
if task_id in scheduler.active_executions:
del scheduler.active_executions[task_id]

View File

@@ -0,0 +1,756 @@
"""
OAuth Token Monitoring Task Executor
Handles execution of OAuth token monitoring tasks for connected platforms.
"""
import logging
import os
import time
from datetime import datetime, timedelta
from typing import Dict, Any, Optional
from sqlalchemy.orm import Session
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask, OAuthTokenExecutionLog
from models.subscription_models import UsageAlert
from utils.logger_utils import get_service_logger
# Import platform-specific services
from services.gsc_service import GSCService
from services.integrations.bing_oauth import BingOAuthService
from services.integrations.wordpress_oauth import WordPressOAuthService
from services.wix_service import WixService
logger = get_service_logger("oauth_token_monitoring_executor")
class OAuthTokenMonitoringExecutor(TaskExecutor):
"""
Executor for OAuth token monitoring tasks.
Handles:
- Checking token validity and expiration
- Attempting automatic token refresh
- Logging results and updating task status
- One-time refresh attempt (no automatic retries on failure)
"""
def __init__(self):
self.logger = logger
self.exception_handler = SchedulerExceptionHandler()
# Expiration warning window (7 days before expiration)
self.expiration_warning_days = 7
async def execute_task(self, task: OAuthTokenMonitoringTask, db: Session) -> TaskExecutionResult:
"""
Execute an OAuth token monitoring task.
This checks token status and attempts refresh if needed.
If refresh fails, marks task as failed and does not retry automatically.
Args:
task: OAuthTokenMonitoringTask instance
db: Database session
Returns:
TaskExecutionResult
"""
start_time = time.time()
user_id = task.user_id
platform = task.platform
try:
self.logger.info(
f"Executing OAuth token monitoring: task_id={task.id} | "
f"user_id={user_id} | platform={platform}"
)
# Create execution log
execution_log = OAuthTokenExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='running'
)
db.add(execution_log)
db.flush()
# Check and refresh token
result = await self._check_and_refresh_token(task, db)
# Update execution log
execution_time_ms = int((time.time() - start_time) * 1000)
execution_log.status = 'success' if result.success else 'failed'
execution_log.result_data = result.result_data
execution_log.error_message = result.error_message
execution_log.execution_time_ms = execution_time_ms
# Update task based on result
task.last_check = datetime.utcnow()
if result.success:
task.last_success = datetime.utcnow()
task.status = 'active'
task.failure_reason = None
# Schedule next check (7 days from now)
task.next_check = self.calculate_next_execution(
task=task,
frequency='Weekly',
last_execution=task.last_check
)
else:
# Refresh failed - mark as failed and stop automatic retries
task.last_failure = datetime.utcnow()
task.failure_reason = result.error_message
task.status = 'failed'
# Do NOT update next_check - wait for manual trigger
self.logger.warning(
f"OAuth token refresh failed for user {user_id}, platform {platform}. "
f"Task marked as failed. No automatic retry will be scheduled."
)
# Create UsageAlert notification for the user
self._create_failure_alert(user_id, platform, result.error_message, result.result_data, db)
task.updated_at = datetime.utcnow()
db.commit()
return result
except Exception as e:
execution_time_ms = int((time.time() - start_time) * 1000)
# Set database session for exception handler
self.exception_handler.db = db
# Create structured error
error = TaskExecutionError(
message=f"Error executing OAuth token monitoring task {task.id}: {str(e)}",
user_id=user_id,
task_id=task.id,
task_type="oauth_token_monitoring",
execution_time_ms=execution_time_ms,
context={
"platform": platform,
"user_id": user_id
},
original_error=e
)
# Handle exception with structured logging
self.exception_handler.handle_exception(error)
# Update execution log with error
try:
execution_log = OAuthTokenExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='failed',
error_message=str(e),
execution_time_ms=execution_time_ms,
result_data={
"error_type": error.error_type.value,
"severity": error.severity.value,
"context": error.context
}
)
db.add(execution_log)
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
task.status = 'failed'
task.last_check = datetime.utcnow()
task.updated_at = datetime.utcnow()
# Do NOT update next_check - wait for manual trigger
# Create UsageAlert notification for the user
self._create_failure_alert(user_id, task.platform, str(e), None, db)
db.commit()
except Exception as commit_error:
db_error = DatabaseError(
message=f"Error saving execution log: {str(commit_error)}",
user_id=user_id,
task_id=task.id,
original_error=commit_error
)
self.exception_handler.handle_exception(db_error)
db.rollback()
return TaskExecutionResult(
success=False,
error_message=str(e),
execution_time_ms=execution_time_ms,
retryable=False, # Do not retry automatically
retry_delay=0
)
async def _check_and_refresh_token(
self,
task: OAuthTokenMonitoringTask,
db: Session
) -> TaskExecutionResult:
"""
Check token status and attempt refresh if needed.
Tokens are stored in the database from onboarding step 5:
- GSC: gsc_credentials table (via GSCService)
- Bing: bing_oauth_tokens table (via BingOAuthService)
- WordPress: wordpress_oauth_tokens table (via WordPressOAuthService)
- Wix: Currently in frontend sessionStorage (backend storage TODO)
Args:
task: OAuthTokenMonitoringTask instance
db: Database session
Returns:
TaskExecutionResult with success status and details
"""
platform = task.platform
user_id = task.user_id
try:
self.logger.info(f"Checking token for platform: {platform}, user: {user_id}")
# Route to platform-specific checking logic
if platform == 'gsc':
return await self._check_gsc_token(user_id)
elif platform == 'bing':
return await self._check_bing_token(user_id)
elif platform == 'wordpress':
return await self._check_wordpress_token(user_id)
elif platform == 'wix':
return await self._check_wix_token(user_id)
else:
return TaskExecutionResult(
success=False,
error_message=f"Unsupported platform: {platform}",
result_data={
'platform': platform,
'user_id': user_id,
'error': 'Unsupported platform'
},
retryable=False
)
except Exception as e:
self.logger.error(
f"Error checking/refreshing token for platform {platform}, user {user_id}: {e}",
exc_info=True
)
return TaskExecutionResult(
success=False,
error_message=f"Token check failed: {str(e)}",
result_data={
'platform': platform,
'user_id': user_id,
'error': str(e)
},
retryable=False # Do not retry automatically
)
async def _check_gsc_token(self, user_id: str) -> TaskExecutionResult:
"""
Check and refresh GSC (Google Search Console) token.
GSC service auto-refreshes tokens if expired when loading credentials.
"""
try:
# Use absolute database path for consistency with onboarding
db_path = os.path.abspath("alwrity.db")
gsc_service = GSCService(db_path=db_path)
credentials = gsc_service.load_user_credentials(user_id)
if not credentials:
return TaskExecutionResult(
success=False,
error_message="GSC credentials not found or could not be loaded",
result_data={
'platform': 'gsc',
'user_id': user_id,
'status': 'not_found',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
# GSC service auto-refreshes if expired, so if we get here, token is valid
result_data = {
'platform': 'gsc',
'user_id': user_id,
'status': 'valid',
'check_time': datetime.utcnow().isoformat(),
'message': 'GSC token is valid (auto-refreshed if expired)'
}
return TaskExecutionResult(
success=True,
result_data=result_data
)
except Exception as e:
self.logger.error(f"Error checking GSC token for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"GSC token check failed: {str(e)}",
result_data={
'platform': 'gsc',
'user_id': user_id,
'error': str(e)
},
retryable=False
)
async def _check_bing_token(self, user_id: str) -> TaskExecutionResult:
"""
Check and refresh Bing Webmaster Tools token.
Checks token expiration and attempts refresh if needed.
"""
try:
# Use absolute database path for consistency with onboarding
db_path = os.path.abspath("alwrity.db")
bing_service = BingOAuthService(db_path=db_path)
# Get token status (includes expired tokens)
token_status = bing_service.get_user_token_status(user_id)
if not token_status.get('has_tokens'):
return TaskExecutionResult(
success=False,
error_message="No Bing tokens found for user",
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'not_found',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
active_tokens = token_status.get('active_tokens', [])
expired_tokens = token_status.get('expired_tokens', [])
# If we have active tokens, check if any are expiring soon (< 7 days)
if active_tokens:
now = datetime.utcnow()
needs_refresh = False
token_to_refresh = None
for token in active_tokens:
expires_at_str = token.get('expires_at')
if expires_at_str:
try:
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
# Check if expires within warning window (7 days)
days_until_expiry = (expires_at - now).days
if days_until_expiry < self.expiration_warning_days:
needs_refresh = True
token_to_refresh = token
break
except Exception:
# If parsing fails, assume token is valid
pass
if needs_refresh and token_to_refresh:
# Attempt to refresh
refresh_token = token_to_refresh.get('refresh_token')
if refresh_token:
refresh_result = bing_service.refresh_access_token(user_id, refresh_token)
if refresh_result:
return TaskExecutionResult(
success=True,
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'refreshed',
'check_time': datetime.utcnow().isoformat(),
'message': 'Bing token refreshed successfully'
}
)
else:
return TaskExecutionResult(
success=False,
error_message="Failed to refresh Bing token",
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'refresh_failed',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
# Token is valid and not expiring soon
return TaskExecutionResult(
success=True,
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'valid',
'check_time': datetime.utcnow().isoformat(),
'message': 'Bing token is valid'
}
)
# No active tokens, check if we can refresh expired ones
if expired_tokens:
# Try to refresh the most recent expired token
latest_token = expired_tokens[0] # Already sorted by created_at DESC
refresh_token = latest_token.get('refresh_token')
if refresh_token:
# Check if token expired recently (within grace period)
expires_at_str = latest_token.get('expires_at')
if expires_at_str:
try:
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
# Only refresh if expired within last 24 hours (grace period)
hours_since_expiry = (datetime.utcnow() - expires_at).total_seconds() / 3600
if hours_since_expiry < 24:
refresh_result = bing_service.refresh_access_token(user_id, refresh_token)
if refresh_result:
return TaskExecutionResult(
success=True,
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'refreshed',
'check_time': datetime.utcnow().isoformat(),
'message': 'Bing token refreshed from expired state'
}
)
except Exception:
pass
return TaskExecutionResult(
success=False,
error_message="Bing token expired and could not be refreshed",
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'expired',
'check_time': datetime.utcnow().isoformat(),
'message': 'Bing token expired. User needs to reconnect.'
},
retryable=False
)
return TaskExecutionResult(
success=False,
error_message="No valid Bing tokens found",
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'invalid',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
except Exception as e:
self.logger.error(f"Error checking Bing token for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"Bing token check failed: {str(e)}",
result_data={
'platform': 'bing',
'user_id': user_id,
'error': str(e)
},
retryable=False
)
async def _check_wordpress_token(self, user_id: str) -> TaskExecutionResult:
"""
Check WordPress token validity.
Note: WordPress tokens cannot be refreshed. They expire after 2 weeks
and require user re-authorization. We only check if token is valid.
"""
try:
# Use absolute database path for consistency with onboarding
db_path = os.path.abspath("alwrity.db")
wordpress_service = WordPressOAuthService(db_path=db_path)
tokens = wordpress_service.get_user_tokens(user_id)
if not tokens:
return TaskExecutionResult(
success=False,
error_message="No WordPress tokens found for user",
result_data={
'platform': 'wordpress',
'user_id': user_id,
'status': 'not_found',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
# Check each token - WordPress tokens expire in 2 weeks
now = datetime.utcnow()
valid_tokens = []
expiring_soon = []
expired_tokens = []
for token in tokens:
expires_at_str = token.get('expires_at')
if expires_at_str:
try:
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
days_until_expiry = (expires_at - now).days
if days_until_expiry < 0:
expired_tokens.append(token)
elif days_until_expiry < self.expiration_warning_days:
expiring_soon.append(token)
else:
valid_tokens.append(token)
except Exception:
# If parsing fails, test token validity via API
access_token = token.get('access_token')
if access_token and wordpress_service.test_token(access_token):
valid_tokens.append(token)
else:
expired_tokens.append(token)
else:
# No expiration date - test token validity
access_token = token.get('access_token')
if access_token and wordpress_service.test_token(access_token):
valid_tokens.append(token)
else:
expired_tokens.append(token)
if valid_tokens:
return TaskExecutionResult(
success=True,
result_data={
'platform': 'wordpress',
'user_id': user_id,
'status': 'valid',
'check_time': datetime.utcnow().isoformat(),
'message': 'WordPress token is valid',
'valid_tokens_count': len(valid_tokens)
}
)
elif expiring_soon:
# WordPress tokens cannot be refreshed - user needs to reconnect
return TaskExecutionResult(
success=False,
error_message="WordPress token expiring soon and cannot be auto-refreshed",
result_data={
'platform': 'wordpress',
'user_id': user_id,
'status': 'expiring_soon',
'check_time': datetime.utcnow().isoformat(),
'message': 'WordPress token expires soon. User needs to reconnect (WordPress tokens cannot be auto-refreshed).'
},
retryable=False
)
else:
return TaskExecutionResult(
success=False,
error_message="WordPress token expired and cannot be refreshed",
result_data={
'platform': 'wordpress',
'user_id': user_id,
'status': 'expired',
'check_time': datetime.utcnow().isoformat(),
'message': 'WordPress token expired. User needs to reconnect (WordPress tokens cannot be auto-refreshed).'
},
retryable=False
)
except Exception as e:
self.logger.error(f"Error checking WordPress token for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"WordPress token check failed: {str(e)}",
result_data={
'platform': 'wordpress',
'user_id': user_id,
'error': str(e)
},
retryable=False
)
async def _check_wix_token(self, user_id: str) -> TaskExecutionResult:
"""
Check Wix token validity.
Note: Wix tokens are currently stored in frontend sessionStorage.
Backend storage needs to be implemented for automated checking.
"""
try:
# TODO: Wix tokens are stored in frontend sessionStorage, not backend database
# Once backend storage is implemented, we can check tokens here
# For now, return not supported
return TaskExecutionResult(
success=False,
error_message="Wix token monitoring not yet supported - tokens stored in frontend sessionStorage",
result_data={
'platform': 'wix',
'user_id': user_id,
'status': 'not_supported',
'check_time': datetime.utcnow().isoformat(),
'message': 'Wix token monitoring requires backend token storage implementation'
},
retryable=False
)
except Exception as e:
self.logger.error(f"Error checking Wix token for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"Wix token check failed: {str(e)}",
result_data={
'platform': 'wix',
'user_id': user_id,
'error': str(e)
},
retryable=False
)
def _create_failure_alert(
self,
user_id: str,
platform: str,
error_message: str,
result_data: Optional[Dict[str, Any]],
db: Session
):
"""
Create a UsageAlert notification when OAuth token refresh fails.
Args:
user_id: User ID
platform: Platform identifier (gsc, bing, wordpress, wix)
error_message: Error message from token check
result_data: Optional result data from token check
db: Database session
"""
try:
# Determine severity based on error type
status = result_data.get('status', 'unknown') if result_data else 'unknown'
if status in ['expired', 'refresh_failed']:
severity = 'error'
alert_type = 'oauth_token_failure'
elif status in ['expiring_soon', 'not_found']:
severity = 'warning'
alert_type = 'oauth_token_warning'
else:
severity = 'error'
alert_type = 'oauth_token_failure'
# Format platform name for display
platform_names = {
'gsc': 'Google Search Console',
'bing': 'Bing Webmaster Tools',
'wordpress': 'WordPress',
'wix': 'Wix'
}
platform_display = platform_names.get(platform, platform.upper())
# Create alert title and message
if status == 'expired':
title = f"{platform_display} Token Expired"
message = (
f"Your {platform_display} access token has expired and could not be automatically renewed. "
f"Please reconnect your {platform_display} account to continue using this integration."
)
elif status == 'expiring_soon':
title = f"{platform_display} Token Expiring Soon"
message = (
f"Your {platform_display} access token will expire soon. "
f"Please reconnect your {platform_display} account to avoid interruption."
)
elif status == 'refresh_failed':
title = f"{platform_display} Token Renewal Failed"
message = (
f"Failed to automatically renew your {platform_display} access token. "
f"Please reconnect your {platform_display} account. "
f"Error: {error_message}"
)
elif status == 'not_found':
title = f"{platform_display} Token Not Found"
message = (
f"No {platform_display} access token found. "
f"Please connect your {platform_display} account in the onboarding settings."
)
else:
title = f"{platform_display} Token Error"
message = (
f"An error occurred while checking your {platform_display} access token. "
f"Please reconnect your {platform_display} account. "
f"Error: {error_message}"
)
# Get current billing period (YYYY-MM format)
from datetime import datetime
billing_period = datetime.utcnow().strftime("%Y-%m")
# Create UsageAlert
alert = UsageAlert(
user_id=user_id,
alert_type=alert_type,
threshold_percentage=0, # Not applicable for OAuth alerts
provider=None, # Not applicable for OAuth alerts
title=title,
message=message,
severity=severity,
is_sent=False, # Will be marked as sent when frontend polls
is_read=False,
billing_period=billing_period
)
db.add(alert)
# Note: We don't commit here - let the caller commit
# This allows the alert to be created atomically with the task update
self.logger.info(
f"Created UsageAlert for OAuth token failure: user={user_id}, "
f"platform={platform}, severity={severity}"
)
except Exception as e:
# Don't fail the entire task execution if alert creation fails
self.logger.error(
f"Failed to create UsageAlert for OAuth token failure: {e}",
exc_info=True
)
def calculate_next_execution(
self,
task: OAuthTokenMonitoringTask,
frequency: str,
last_execution: Optional[datetime] = None
) -> datetime:
"""
Calculate next execution time based on frequency.
For OAuth token monitoring, frequency is always 'Weekly' (7 days).
Args:
task: OAuthTokenMonitoringTask instance
frequency: Frequency string (should be 'Weekly' for token monitoring)
last_execution: Last execution datetime (defaults to task.last_check or now)
Returns:
Next execution datetime
"""
if last_execution is None:
last_execution = task.last_check if task.last_check else datetime.utcnow()
# OAuth token monitoring is always weekly (7 days)
if frequency == 'Weekly':
return last_execution + timedelta(days=7)
else:
# Default to weekly if frequency is not recognized
self.logger.warning(
f"Unknown frequency '{frequency}' for OAuth token monitoring task {task.id}. "
f"Defaulting to Weekly (7 days)."
)
return last_execution + timedelta(days=7)

View File

@@ -1,4 +1,12 @@
"""
Scheduler utilities.
Scheduler Utilities Package
"""
from .task_loader import load_due_monitoring_tasks
from .user_job_store import extract_domain_root, get_user_job_store_name
__all__ = [
'load_due_monitoring_tasks',
'extract_domain_root',
'get_user_job_store_name'
]

View File

@@ -0,0 +1,54 @@
"""
OAuth Token Monitoring Task Loader
Functions to load due OAuth token monitoring tasks from database.
"""
from datetime import datetime
from typing import List, Optional, Union
from sqlalchemy.orm import Session
from sqlalchemy import and_, or_
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
def load_due_oauth_token_monitoring_tasks(
db: Session,
user_id: Optional[Union[str, int]] = None
) -> List[OAuthTokenMonitoringTask]:
"""
Load all OAuth token monitoring tasks that are due for execution.
Criteria:
- status == 'active' (only check active tasks)
- next_check <= now (or is None for first execution)
- Optional: user_id filter for specific user (for user isolation)
User isolation is enforced through filtering by user_id when provided.
If no user_id is provided, loads tasks for all users (for system-wide monitoring).
Args:
db: Database session
user_id: Optional user ID (Clerk string) to filter tasks (if None, loads all users' tasks)
Returns:
List of due OAuthTokenMonitoringTask instances
"""
now = datetime.utcnow()
# Build query for due tasks
query = db.query(OAuthTokenMonitoringTask).filter(
and_(
OAuthTokenMonitoringTask.status == 'active',
or_(
OAuthTokenMonitoringTask.next_check <= now,
OAuthTokenMonitoringTask.next_check.is_(None)
)
)
)
# Apply user filter if provided (for user isolation)
if user_id is not None:
query = query.filter(OAuthTokenMonitoringTask.user_id == str(user_id))
return query.all()

View File

@@ -4,7 +4,7 @@ Functions to load due tasks from database.
"""
from datetime import datetime
from typing import List, Optional
from typing import List, Optional, Union
from sqlalchemy.orm import Session, joinedload
from sqlalchemy import and_, or_
@@ -14,7 +14,7 @@ from models.enhanced_strategy_models import EnhancedContentStrategy
def load_due_monitoring_tasks(
db: Session,
user_id: Optional[int] = None
user_id: Optional[Union[str, int]] = None
) -> List[MonitoringTask]:
"""
Load all monitoring tasks that are due for execution.
@@ -22,14 +22,17 @@ def load_due_monitoring_tasks(
Criteria:
- status == 'active'
- next_execution <= now (or is None for first execution)
- Optional: user_id filter for specific user (for future admin features)
- Optional: user_id filter for specific user (for user isolation)
Note: Strategy relationship is eagerly loaded to ensure user_id is accessible
during task execution for user isolation.
User isolation is enforced through filtering by user_id when provided.
If no user_id is provided, loads tasks for all users (for system-wide monitoring).
Args:
db: Database session
user_id: Optional user ID to filter tasks (if None, loads all users' tasks)
user_id: Optional user ID (Clerk string or int) to filter tasks (if None, loads all users' tasks)
Returns:
List of due MonitoringTask instances with strategy relationship loaded

View File

@@ -0,0 +1,129 @@
"""
User Job Store Utilities
Utilities for managing per-user job stores based on website root.
"""
from typing import Optional
from urllib.parse import urlparse
from loguru import logger
from sqlalchemy.orm import Session as SQLSession
from services.database import get_db_session
from models.onboarding import OnboardingSession, WebsiteAnalysis
def extract_domain_root(url: str) -> str:
"""
Extract domain root from a website URL for use as job store identifier.
Examples:
https://www.example.com -> example
https://blog.example.com -> example
https://example.co.uk -> example
http://subdomain.example.com/path -> example
Args:
url: Website URL
Returns:
Domain root (e.g., 'example') or 'default' if extraction fails
"""
try:
parsed = urlparse(url)
hostname = parsed.netloc or parsed.path.split('/')[0]
# Remove www. prefix if present
if hostname.startswith('www.'):
hostname = hostname[4:]
# Split by dots and get the root domain
# For example.com -> example, for example.co.uk -> example
parts = hostname.split('.')
if len(parts) >= 2:
# Handle common TLDs that might be part of domain (e.g., co.uk)
if len(parts) >= 3 and parts[-2] in ['co', 'com', 'net', 'org']:
root = parts[-3]
else:
root = parts[-2]
else:
root = parts[0] if parts else 'default'
# Clean and validate root
root = root.lower().strip()
# Remove invalid characters for job store name
root = ''.join(c for c in root if c.isalnum() or c in ['-', '_'])
if not root or len(root) < 2:
return 'default'
return root
except Exception as e:
logger.warning(f"Failed to extract domain root from URL '{url}': {e}")
return 'default'
def get_user_job_store_name(user_id: str, db: SQLSession = None) -> str:
"""
Get job store name for a user based on their website root from onboarding.
Args:
user_id: User ID (Clerk string)
db: Optional database session (will create if not provided)
Returns:
Job store name (e.g., 'example' or 'default')
"""
db_session = db
close_db = False
try:
if not db_session:
db_session = get_db_session()
close_db = True
if not db_session:
logger.warning(f"Could not get database session for user {user_id}, using default job store")
return 'default'
# Get user's website URL from onboarding
# Query directly since user_id is a string (Clerk ID)
onboarding_session = db_session.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not onboarding_session:
logger.debug(
f"[Job Store] No onboarding session found for user {user_id}, using default job store. "
f"This is normal if user hasn't completed onboarding."
)
return 'default'
# Get the latest website analysis for this session
website_analysis = db_session.query(WebsiteAnalysis).filter(
WebsiteAnalysis.session_id == onboarding_session.id
).order_by(WebsiteAnalysis.updated_at.desc()).first()
if not website_analysis or not website_analysis.website_url:
logger.debug(
f"[Job Store] No website URL found for user {user_id} (session_id: {onboarding_session.id}), "
f"using default job store. This is normal if website analysis wasn't completed."
)
return 'default'
website_url = website_analysis.website_url
domain_root = extract_domain_root(website_url)
logger.debug(f"Job store for user {user_id}: {domain_root} (from {website_url})")
return domain_root
except Exception as e:
logger.error(f"Error getting job store name for user {user_id}: {e}")
return 'default'
finally:
if close_db and db_session:
try:
db_session.close()
except Exception:
pass

View File

@@ -494,10 +494,8 @@ class LimitValidator:
display_provider_name = actual_provider_name or provider_name
logger.error(f"[Pre-flight Check] ✅ Operation {op_idx + 1}/{len(operations)}: {operation_type}")
logger.error(f" ├─ Provider: {display_provider_name} (enum: {provider_name})")
logger.error(f" ├─ Operation Index: {op_idx}")
logger.error(f" └─ Estimated Tokens Requested: {tokens_requested}")
# Log operation details at debug level (only when needed)
logger.debug(f"[Pre-flight] Operation {op_idx + 1}/{len(operations)}: {operation_type} ({display_provider_name}, {tokens_requested} tokens)")
# Check if this is an LLM provider
llm_providers = ['gemini', 'openai', 'anthropic', 'mistral']
@@ -563,13 +561,11 @@ class LimitValidator:
if result:
base_current_tokens = result[0] if result[0] is not None else 0
logger.error(f"[Pre-flight Check] ✅ Raw SQL query returned result: {result[0]} -> {base_current_tokens}")
else:
base_current_tokens = 0
logger.error(f"[Pre-flight Check] ⚠️ Raw SQL query returned None (no rows found)")
query_succeeded = True
logger.error(f"[Pre-flight Check] ✅ Raw SQL query succeeded for {provider_tokens_key}: {base_current_tokens}")
logger.debug(f"[Pre-flight] Raw SQL query for {provider_tokens_key}: {base_current_tokens}")
except Exception as sql_error:
logger.error(f" └─ Raw SQL query failed for {provider_tokens_key}: {type(sql_error).__name__}: {sql_error}", exc_info=True)
@@ -606,14 +602,8 @@ class LimitValidator:
if not query_succeeded:
logger.warning(f" └─ Both query methods failed, using 0 as fallback")
# CRITICAL LOG: Always log what we got from DB - this helps debug renewal issues
# Use ERROR level to ensure it shows even if INFO is filtered
logger.error(f"[Pre-flight Check] 🔍 Fresh DB Query for {display_provider_name}:")
logger.error(f" ├─ Column: {provider_tokens_key}")
logger.error(f" ├─ Billing Period: {current_period}")
logger.error(f" ├─ User ID: {user_id}")
logger.error(f" ├─ Method: {'Raw SQL' if query_succeeded and base_current_tokens >= 0 else 'ORM' if query_succeeded else 'Failed - using 0'}")
logger.error(f" └─ Value from DB: {base_current_tokens}")
# Log DB query result at debug level (only when needed for troubleshooting)
logger.debug(f"[Pre-flight] DB query for {display_provider_name} ({provider_tokens_key}): {base_current_tokens} (period: {current_period})")
# Add any projected tokens from previous operations in this validation run
# Note: total_llm_tokens tracks ONLY projected tokens from this run, not base DB value
@@ -622,16 +612,8 @@ class LimitValidator:
# Current tokens = base from DB + projected from previous operations in this run
current_provider_tokens = base_current_tokens + projected_from_previous
# Use ERROR level to ensure visibility
logger.error(f"[Pre-flight Check] 📊 Token Calculation for {display_provider_name}:")
logger.error(f" ├─ Base from DB (fresh query): {base_current_tokens}")
logger.error(f" ├─ Projected from previous ops in this run: {projected_from_previous}")
logger.error(f" └─ Total current tokens (base + projected): {current_provider_tokens}")
# Also check the initial usage object to see if it's being used incorrectly
if usage and hasattr(usage, provider_tokens_key):
initial_usage_value = getattr(usage, provider_tokens_key, 0) or 0
logger.error(f" ⚠️ Initial usage object value: {initial_usage_value} (this should NOT be used for fresh query)")
# Log token calculation at debug level
logger.debug(f"[Pre-flight] Token calc for {display_provider_name}: base={base_current_tokens}, projected={projected_from_previous}, total={current_provider_tokens}")
token_limit = limits.get(provider_tokens_key, 0) or 0
@@ -687,15 +669,10 @@ class LimitValidator:
if tokens_requested > 0:
# Add this operation's tokens to cumulative projected tokens
total_llm_tokens[provider_tokens_key] = projected_from_previous + tokens_requested
logger.error(f"[Pre-flight Check] 📝 Updated cumulative projected tokens for {display_provider_name}:")
logger.error(f" ├─ Previous projected: {projected_from_previous}")
logger.error(f" ├─ This operation requested: {tokens_requested}")
logger.error(f" ├─ New cumulative projected: {total_llm_tokens[provider_tokens_key]}")
logger.error(f" └─ Old value in dict was: {old_projected}")
logger.debug(f"[Pre-flight] Updated projected tokens for {display_provider_name}: {projected_from_previous} + {tokens_requested} = {total_llm_tokens[provider_tokens_key]}")
else:
# No tokens requested, keep existing projected tokens (or 0 if first operation)
total_llm_tokens[provider_tokens_key] = projected_from_previous
logger.error(f"[Pre-flight Check] 📝 No tokens requested, keeping projected at: {projected_from_previous}")
# Check image generation limits
elif provider == APIProvider.STABILITY:

View File

@@ -237,9 +237,10 @@ async def monitoring_middleware(request: Request, call_next):
# Check for authorization header with user info
elif 'authorization' in request.headers:
# Auth middleware should have set request.state.user_id
# If not, this indicates an authentication failure that should be logged
# If not, this indicates an authentication failure (likely expired token)
# Log at debug level to reduce noise - expired tokens are expected
user_id = None
logger.warning("Monitoring: Auth header present but no user_id in state - authentication may have failed")
logger.debug("Monitoring: Auth header present but no user_id in state - token likely expired")
# Final fallback: None (skip usage limits for truly anonymous/unauthenticated)
else:

View File

@@ -93,11 +93,7 @@ def validate_research_operations(
provider = usage_info.get('provider', llm_provider_name) if usage_info else llm_provider_name
operation_type = usage_info.get('operation_type', 'unknown')
logger.error(f"[Pre-flight Validator] ❌ RESEARCH WORKFLOW BLOCKED")
logger.error(f" ├─ User: {user_id}")
logger.error(f" ├─ Blocked at: {operation_type}")
logger.error(f" ├─ Provider: {provider}")
logger.error(f" └─ Reason: {message}")
logger.warning(f"[Pre-flight] Research blocked for user {user_id}: {operation_type} ({provider}) - {message}")
# Raise HTTPException immediately - frontend gets immediate response, no API calls made
raise HTTPException(