Scheduled research persona generation

This commit is contained in:
ajaysi
2025-11-05 08:51:00 +05:30
parent 55087c4f37
commit d99c7c83a7
98 changed files with 14518 additions and 828 deletions

View File

@@ -0,0 +1,141 @@
"""
Check Cycle Handler
Handles the main scheduler check cycle that finds and executes due tasks.
"""
from typing import TYPE_CHECKING, Dict, Any
from datetime import datetime
from sqlalchemy.orm import Session
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from models.scheduler_models import SchedulerEventLog
from .exception_handler import DatabaseError
from .interval_manager import adjust_check_interval_if_needed
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("check_cycle_handler")
async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
"""
Main scheduler loop: check for due tasks and execute them.
This runs periodically with intelligent interval adjustment based on active strategies.
Args:
scheduler: TaskScheduler instance
"""
scheduler.stats['total_checks'] += 1
check_start_time = datetime.utcnow()
scheduler.stats['last_check'] = check_start_time.isoformat()
# Track execution summary for this check cycle
cycle_summary = {
'tasks_found_by_type': {},
'tasks_executed_by_type': {},
'tasks_failed_by_type': {},
'total_found': 0,
'total_executed': 0,
'total_failed': 0
}
db = None
try:
db = get_db_session()
if db is None:
logger.error("[Scheduler Check] ❌ Failed to get database session")
return
# Check for active strategies and adjust interval intelligently
await adjust_check_interval_if_needed(scheduler, db)
# Check each registered task type
registered_types = scheduler.registry.get_registered_types()
for task_type in registered_types:
type_summary = await scheduler._process_task_type(task_type, db, cycle_summary)
if type_summary:
cycle_summary['tasks_found_by_type'][task_type] = type_summary.get('found', 0)
cycle_summary['tasks_executed_by_type'][task_type] = type_summary.get('executed', 0)
cycle_summary['tasks_failed_by_type'][task_type] = type_summary.get('failed', 0)
# Calculate totals
cycle_summary['total_found'] = sum(cycle_summary['tasks_found_by_type'].values())
cycle_summary['total_executed'] = sum(cycle_summary['tasks_executed_by_type'].values())
cycle_summary['total_failed'] = sum(cycle_summary['tasks_failed_by_type'].values())
# Log comprehensive check cycle summary
check_duration = (datetime.utcnow() - check_start_time).total_seconds()
active_strategies = scheduler.stats.get('active_strategies_count', 0)
active_executions = len(scheduler.active_executions)
# Build comprehensive check cycle summary log message
check_lines = [
f"[Scheduler Check] 🔍 Check Cycle #{scheduler.stats['total_checks']} Completed",
f" ├─ Duration: {check_duration:.2f}s",
f" ├─ Active Strategies: {active_strategies}",
f" ├─ Check Interval: {scheduler.current_check_interval_minutes}min",
f" ├─ User Isolation: Enabled (tasks filtered by user_id)",
f" ├─ Tasks Found: {cycle_summary['total_found']} total"
]
if cycle_summary['tasks_found_by_type']:
task_types_list = list(cycle_summary['tasks_found_by_type'].items())
for idx, (task_type, count) in enumerate(task_types_list):
executed = cycle_summary['tasks_executed_by_type'].get(task_type, 0)
failed = cycle_summary['tasks_failed_by_type'].get(task_type, 0)
is_last_task_type = idx == len(task_types_list) - 1 and cycle_summary['total_executed'] == 0 and cycle_summary['total_failed'] == 0
prefix = " └─" if is_last_task_type else " ├─"
check_lines.append(f"{prefix} {task_type}: {count} found, {executed} executed, {failed} failed")
if cycle_summary['total_found'] > 0:
check_lines.append(f" ├─ Total Executed: {cycle_summary['total_executed']}")
check_lines.append(f" ├─ Total Failed: {cycle_summary['total_failed']}")
check_lines.append(f" └─ Active Executions: {active_executions}/{scheduler.max_concurrent_executions}")
else:
check_lines.append(f" └─ No tasks found - scheduler idle")
# Log comprehensive check cycle summary in single message
logger.warning("\n".join(check_lines))
# Save check cycle event to database for historical tracking
try:
event_log = SchedulerEventLog(
event_type='check_cycle',
event_date=check_start_time,
check_cycle_number=scheduler.stats['total_checks'],
check_interval_minutes=scheduler.current_check_interval_minutes,
tasks_found=cycle_summary.get('total_found', 0),
tasks_executed=cycle_summary.get('total_executed', 0),
tasks_failed=cycle_summary.get('total_failed', 0),
tasks_by_type=cycle_summary.get('tasks_found_by_type', {}),
check_duration_seconds=check_duration,
active_strategies_count=active_strategies,
active_executions=active_executions,
event_data={
'executed_by_type': cycle_summary.get('tasks_executed_by_type', {}),
'failed_by_type': cycle_summary.get('tasks_failed_by_type', {})
}
)
db.add(event_log)
db.commit()
except Exception as e:
logger.warning(f"Failed to save check cycle event log: {e}")
if db:
db.rollback()
# Update last_update timestamp for frontend polling
scheduler.stats['last_update'] = datetime.utcnow().isoformat()
except Exception as e:
error = DatabaseError(
message=f"Error checking for due tasks: {str(e)}",
original_error=e
)
scheduler.exception_handler.handle_exception(error)
logger.error(f"[Scheduler Check] ❌ Error in check cycle: {str(e)}")
finally:
if db:
db.close()

View File

@@ -0,0 +1,139 @@
"""
Interval Manager
Handles intelligent scheduling interval adjustment based on active strategies.
"""
from typing import TYPE_CHECKING
from datetime import datetime
from sqlalchemy.orm import Session
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from models.scheduler_models import SchedulerEventLog
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("interval_manager")
async def determine_optimal_interval(
scheduler: 'TaskScheduler',
min_interval: int,
max_interval: int
) -> int:
"""
Determine optimal check interval based on active strategies.
Args:
scheduler: TaskScheduler instance
min_interval: Minimum check interval in minutes
max_interval: Maximum check interval in minutes
Returns:
Optimal check interval in minutes
"""
db = None
try:
db = get_db_session()
if db:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
scheduler.stats['active_strategies_count'] = active_count
if active_count > 0:
logger.info(f"Found {active_count} active strategies with tasks - using {min_interval}min interval")
return min_interval
else:
logger.info(f"No active strategies with tasks - using {max_interval}min interval")
return max_interval
except Exception as e:
logger.warning(f"Error determining optimal interval: {e}, using default {min_interval}min")
finally:
if db:
db.close()
# Default to shorter interval on error (safer)
return min_interval
async def adjust_check_interval_if_needed(
scheduler: 'TaskScheduler',
db: Session
):
"""
Intelligently adjust check interval based on active strategies.
If there are active strategies with tasks, check more frequently.
If there are no active strategies, check less frequently.
Args:
scheduler: TaskScheduler instance
db: Database session
"""
try:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
scheduler.stats['active_strategies_count'] = active_count
# Determine optimal interval
if active_count > 0:
optimal_interval = scheduler.min_check_interval_minutes
else:
optimal_interval = scheduler.max_check_interval_minutes
# Only reschedule if interval needs to change
if optimal_interval != scheduler.current_check_interval_minutes:
interval_message = (
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
f" ├─ Optimal: {optimal_interval}min\n"
f" ├─ Active Strategies: {active_count}\n"
f" └─ Reason: {'Active strategies detected' if active_count > 0 else 'No active strategies'}"
)
logger.warning(interval_message)
# Reschedule the job with new interval
scheduler.scheduler.modify_job(
'check_due_tasks',
trigger=scheduler._get_trigger_for_interval(optimal_interval)
)
# Save previous interval before updating
previous_interval = scheduler.current_check_interval_minutes
# Update current interval
scheduler.current_check_interval_minutes = optimal_interval
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
# Save interval adjustment event to database
try:
event_db = get_db_session()
if event_db:
event_log = SchedulerEventLog(
event_type='interval_adjustment',
event_date=datetime.utcnow(),
previous_interval_minutes=previous_interval,
new_interval_minutes=optimal_interval,
check_interval_minutes=optimal_interval,
active_strategies_count=active_count,
event_data={
'reason': 'intelligent_scheduling',
'min_interval': scheduler.min_check_interval_minutes,
'max_interval': scheduler.max_check_interval_minutes
}
)
event_db.add(event_log)
event_db.commit()
event_db.close()
except Exception as e:
logger.warning(f"Failed to save interval adjustment event log: {e}")
logger.warning(f"[Scheduler] ✅ Interval adjusted to {optimal_interval}min")
except Exception as e:
logger.warning(f"Error adjusting check interval: {e}")

View File

@@ -0,0 +1,269 @@
"""
Job Restoration
Handles restoration of one-time jobs (e.g., persona generation) on scheduler startup.
Preserves original scheduled times from database to avoid rescheduling on server restarts.
"""
from typing import TYPE_CHECKING
from datetime import datetime, timezone, timedelta
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from models.scheduler_models import SchedulerEventLog
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("job_restoration")
async def restore_persona_jobs(scheduler: 'TaskScheduler'):
"""
Restore one-time persona generation jobs for users who completed onboarding
but don't have personas yet. This ensures jobs persist across server restarts.
IMPORTANT: Preserves original scheduled times from SchedulerEventLog to avoid
rescheduling jobs with new times on server restarts.
Args:
scheduler: TaskScheduler instance
"""
try:
db = get_db_session()
if not db:
logger.warning("Could not get database session to restore persona jobs")
return
try:
from models.onboarding import OnboardingSession
from services.research.research_persona_scheduler import (
schedule_research_persona_generation,
generate_research_persona_task
)
from services.persona.facebook.facebook_persona_scheduler import (
schedule_facebook_persona_generation,
generate_facebook_persona_task
)
from services.research.research_persona_service import ResearchPersonaService
from services.persona_data_service import PersonaDataService
# Get all users who completed onboarding
completed_sessions = db.query(OnboardingSession).filter(
OnboardingSession.progress == 100.0
).all()
restored_count = 0
skipped_count = 0
now = datetime.utcnow().replace(tzinfo=timezone.utc)
for session in completed_sessions:
user_id = session.user_id
# Restore research persona job
try:
research_service = ResearchPersonaService(db_session=db)
persona_data_record = research_service._get_persona_data_record(user_id)
research_persona_exists = False
if persona_data_record:
research_persona_data = getattr(persona_data_record, 'research_persona', None)
research_persona_exists = bool(research_persona_data)
if not research_persona_exists:
# Note: Clerk user_id already includes "user_" prefix
job_id = f"research_persona_{user_id}"
# Check if job already exists in scheduler (just started, so unlikely)
existing_jobs = [j for j in scheduler.scheduler.get_jobs()
if j.id == job_id]
if not existing_jobs:
# Check SchedulerEventLog for original scheduled time
original_scheduled_event = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type == 'job_scheduled',
SchedulerEventLog.job_id == job_id,
SchedulerEventLog.user_id == user_id
).order_by(SchedulerEventLog.event_date.desc()).first()
# Check if job was already completed or failed
completed_event = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type.in_(['job_completed', 'job_failed']),
SchedulerEventLog.job_id == job_id,
SchedulerEventLog.user_id == user_id
).order_by(SchedulerEventLog.event_date.desc()).first()
if completed_event:
# Job was already completed/failed, skip
skipped_count += 1
logger.debug(f"Research persona job {job_id} already completed/failed, skipping restoration")
elif original_scheduled_event and original_scheduled_event.event_data:
# Restore with original scheduled time
scheduled_for_str = original_scheduled_event.event_data.get('scheduled_for')
if scheduled_for_str:
try:
original_time = datetime.fromisoformat(scheduled_for_str.replace('Z', '+00:00'))
if original_time.tzinfo is None:
original_time = original_time.replace(tzinfo=timezone.utc)
# Check if original time is in the past (within grace period)
time_since_scheduled = (now - original_time).total_seconds()
if time_since_scheduled > 0 and time_since_scheduled <= 3600: # Within 1 hour grace period
# Execute immediately (missed job)
logger.warning(f"Restoring research persona job {job_id} - original time was {original_time}, executing now (missed)")
try:
await generate_research_persona_task(user_id)
except Exception as exec_error:
logger.error(f"Error executing missed research persona job {job_id}: {exec_error}")
elif original_time > now:
# Restore with original future time
time_until_run = (original_time - now).total_seconds() / 60 # minutes
logger.warning(
f"[Restoration] Restoring research persona job {job_id} with ORIGINAL scheduled time: "
f"{original_time} (UTC) = {original_time.astimezone().strftime('%H:%M:%S %Z')} (local), "
f"will run in {time_until_run:.1f} minutes"
)
scheduler.schedule_one_time_task(
func=generate_research_persona_task,
run_date=original_time,
job_id=job_id,
kwargs={'user_id': user_id},
replace_existing=True
)
restored_count += 1
else:
# Too old (beyond grace period), skip
skipped_count += 1
logger.debug(f"Research persona job {job_id} scheduled time {original_time} is too old, skipping")
except Exception as time_error:
logger.warning(f"Error parsing original scheduled time for {job_id}: {time_error}, scheduling new job")
# Fall through to schedule new job
schedule_research_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
# No original time in event data, schedule new job
logger.warning(
f"[Restoration] No original scheduled time found for research persona job {job_id}, "
f"scheduling NEW job with current time + 20 minutes"
)
schedule_research_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
# No previous scheduled event, schedule new job
logger.warning(
f"[Restoration] No previous scheduled event found for research persona job {job_id}, "
f"scheduling NEW job with current time + 20 minutes"
)
schedule_research_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
skipped_count += 1
logger.debug(f"Research persona job {job_id} already exists in scheduler, skipping restoration")
except Exception as e:
logger.debug(f"Could not restore research persona for user {user_id}: {e}")
# Restore Facebook persona job
try:
persona_data_service = PersonaDataService(db_session=db)
persona_data = persona_data_service.get_user_persona_data(user_id)
platform_personas = persona_data.get('platform_personas', {}) if persona_data else {}
facebook_persona_exists = bool(platform_personas.get('facebook') if platform_personas else None)
has_core_persona = bool(persona_data.get('core_persona') if persona_data else False)
if not facebook_persona_exists and has_core_persona:
# Note: Clerk user_id already includes "user_" prefix
job_id = f"facebook_persona_{user_id}"
# Check if job already exists in scheduler
existing_jobs = [j for j in scheduler.scheduler.get_jobs()
if j.id == job_id]
if not existing_jobs:
# Check SchedulerEventLog for original scheduled time
original_scheduled_event = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type == 'job_scheduled',
SchedulerEventLog.job_id == job_id,
SchedulerEventLog.user_id == user_id
).order_by(SchedulerEventLog.event_date.desc()).first()
# Check if job was already completed or failed
completed_event = db.query(SchedulerEventLog).filter(
SchedulerEventLog.event_type.in_(['job_completed', 'job_failed']),
SchedulerEventLog.job_id == job_id,
SchedulerEventLog.user_id == user_id
).order_by(SchedulerEventLog.event_date.desc()).first()
if completed_event:
skipped_count += 1
logger.debug(f"Facebook persona job {job_id} already completed/failed, skipping restoration")
elif original_scheduled_event and original_scheduled_event.event_data:
# Restore with original scheduled time
scheduled_for_str = original_scheduled_event.event_data.get('scheduled_for')
if scheduled_for_str:
try:
original_time = datetime.fromisoformat(scheduled_for_str.replace('Z', '+00:00'))
if original_time.tzinfo is None:
original_time = original_time.replace(tzinfo=timezone.utc)
# Check if original time is in the past (within grace period)
time_since_scheduled = (now - original_time).total_seconds()
if time_since_scheduled > 0 and time_since_scheduled <= 3600: # Within 1 hour grace period
# Execute immediately (missed job)
logger.warning(f"Restoring Facebook persona job {job_id} - original time was {original_time}, executing now (missed)")
try:
await generate_facebook_persona_task(user_id)
except Exception as exec_error:
logger.error(f"Error executing missed Facebook persona job {job_id}: {exec_error}")
elif original_time > now:
# Restore with original future time
time_until_run = (original_time - now).total_seconds() / 60 # minutes
logger.warning(
f"[Restoration] Restoring Facebook persona job {job_id} with ORIGINAL scheduled time: "
f"{original_time} (UTC) = {original_time.astimezone().strftime('%H:%M:%S %Z')} (local), "
f"will run in {time_until_run:.1f} minutes"
)
scheduler.schedule_one_time_task(
func=generate_facebook_persona_task,
run_date=original_time,
job_id=job_id,
kwargs={'user_id': user_id},
replace_existing=True
)
restored_count += 1
else:
skipped_count += 1
logger.debug(f"Facebook persona job {job_id} scheduled time {original_time} is too old, skipping")
except Exception as time_error:
logger.warning(f"Error parsing original scheduled time for {job_id}: {time_error}, scheduling new job")
schedule_facebook_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
logger.warning(
f"[Restoration] No original scheduled time found for Facebook persona job {job_id}, "
f"scheduling NEW job with current time + 20 minutes"
)
schedule_facebook_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
# No previous scheduled event, schedule new job
logger.warning(
f"[Restoration] No previous scheduled event found for Facebook persona job {job_id}, "
f"scheduling NEW job with current time + 20 minutes"
)
schedule_facebook_persona_generation(user_id, delay_minutes=20)
restored_count += 1
else:
skipped_count += 1
logger.debug(f"Facebook persona job {job_id} already exists in scheduler, skipping restoration")
except Exception as e:
logger.debug(f"Could not restore Facebook persona for user {user_id}: {e}")
if restored_count > 0:
logger.warning(f"[Scheduler] ✅ Restored {restored_count} persona generation job(s) on startup (preserved original scheduled times)")
if skipped_count > 0:
logger.debug(f"[Scheduler] Skipped {skipped_count} persona job(s) (already completed/failed or exist)")
finally:
db.close()
except Exception as e:
logger.warning(f"Error restoring persona jobs: {e}")

View File

@@ -0,0 +1,196 @@
"""
OAuth Token Monitoring Task Restoration
Automatically creates missing OAuth monitoring tasks for users who have connected platforms
but don't have monitoring tasks created yet.
"""
from datetime import datetime, timedelta
from typing import List
from sqlalchemy.orm import Session
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
from services.oauth_token_monitoring_service import get_connected_platforms, create_oauth_monitoring_tasks
# Use service logger for consistent logging (WARNING level visible in production)
logger = get_service_logger("oauth_task_restoration")
async def restore_oauth_monitoring_tasks(scheduler):
"""
Restore/create missing OAuth token monitoring tasks for all users.
This checks all users who have connected platforms and ensures they have
monitoring tasks created. Tasks are created for platforms that are:
- Connected (detected via get_connected_platforms)
- Missing monitoring tasks (no OAuthTokenMonitoringTask exists)
Args:
scheduler: TaskScheduler instance
"""
try:
logger.warning("[OAuth Task Restoration] Starting OAuth monitoring task restoration...")
db = get_db_session()
if not db:
logger.warning("[OAuth Task Restoration] Could not get database session")
return
try:
# Get all existing OAuth tasks to find unique user_ids
existing_tasks = db.query(OAuthTokenMonitoringTask).all()
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
# Log existing tasks breakdown by platform
existing_by_platform = {}
for task in existing_tasks:
existing_by_platform[task.platform] = existing_by_platform.get(task.platform, 0) + 1
platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(existing_by_platform.items())])
logger.warning(
f"[OAuth Task Restoration] Found {len(existing_tasks)} existing OAuth tasks "
f"for {len(user_ids_with_tasks)} users. Platforms: {platform_summary}"
)
# Check users who already have at least one OAuth task
users_to_check = list(user_ids_with_tasks)
# Also query all users from onboarding who completed step 5 (integrations)
# to catch users who connected platforms but tasks weren't created
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
# This matches the logic used in home page redirect and persona generation checks
try:
from services.onboarding.progress_service import get_onboarding_progress_service
from models.onboarding import OnboardingSession
from sqlalchemy import or_
# Get onboarding progress service (same as used throughout the app)
progress_service = get_onboarding_progress_service()
# Query all sessions and filter using the same completion logic as the service
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
completed_sessions = db.query(OnboardingSession).filter(
or_(
OnboardingSession.current_step >= 6,
OnboardingSession.progress >= 100.0
)
).all()
# Validate using the service method for consistency
onboarding_user_ids = set()
for session in completed_sessions:
# Use the same service method as the rest of the app
status = progress_service.get_onboarding_status(session.user_id)
if status.get('is_completed', False):
onboarding_user_ids.add(session.user_id)
all_user_ids = users_to_check.copy()
# Add users from onboarding who might not have tasks yet
for user_id in onboarding_user_ids:
if user_id not in all_user_ids:
all_user_ids.append(user_id)
users_to_check = all_user_ids
logger.warning(
f"[OAuth Task Restoration] Checking {len(users_to_check)} users "
f"({len(user_ids_with_tasks)} with existing tasks, "
f"{len(onboarding_user_ids)} from onboarding sessions, "
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
)
except Exception as e:
logger.warning(f"[OAuth Task Restoration] Could not query onboarding users: {e}")
# Fallback to users with existing tasks only
total_created = 0
for user_id in users_to_check:
try:
# Get connected platforms for this user
connected_platforms = get_connected_platforms(user_id)
logger.warning(
f"[OAuth Task Restoration] User {user_id}: "
f"Connected platforms: {connected_platforms}"
)
if not connected_platforms:
logger.debug(
f"[OAuth Task Restoration] No connected platforms for user {user_id}, skipping"
)
continue
# Check which platforms are missing tasks
existing_platforms = {
task.platform
for task in existing_tasks
if task.user_id == user_id
}
missing_platforms = [
platform
for platform in connected_platforms
if platform not in existing_platforms
]
if missing_platforms:
logger.warning(
f"[OAuth Task Restoration] ⚠️ User {user_id} has connected platforms "
f"{connected_platforms} but missing tasks for: {missing_platforms}"
)
# Create missing tasks
created = create_oauth_monitoring_tasks(
user_id=user_id,
db=db,
platforms=missing_platforms
)
total_created += len(created)
logger.warning(
f"[OAuth Task Restoration] ✅ Created {len(created)} missing OAuth tasks "
f"for user {user_id}, platforms: {missing_platforms}"
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ User {user_id} has all required tasks "
f"for connected platforms: {connected_platforms}"
)
except Exception as e:
logger.warning(
f"[OAuth Task Restoration] Error checking/creating tasks for user {user_id}: {e}",
exc_info=True
)
continue
# Final summary log with platform breakdown
final_existing_tasks = db.query(OAuthTokenMonitoringTask).all()
final_by_platform = {}
for task in final_existing_tasks:
final_by_platform[task.platform] = final_by_platform.get(task.platform, 0) + 1
final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())])
if total_created > 0:
logger.warning(
f"[OAuth Task Restoration] ✅ Created {total_created} missing OAuth monitoring tasks. "
f"Final platform breakdown: {final_platform_summary}"
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks. "
f"Platform breakdown: {final_platform_summary}"
)
finally:
db.close()
except Exception as e:
logger.error(
f"[OAuth Task Restoration] Error restoring OAuth monitoring tasks: {e}",
exc_info=True
)

View File

@@ -10,6 +10,7 @@ from datetime import datetime
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.interval import IntervalTrigger
from apscheduler.triggers.date import DateTrigger
from sqlalchemy.orm import Session
from .executor_interface import TaskExecutor, TaskExecutionResult
@@ -20,6 +21,13 @@ from .exception_handler import (
)
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from ..utils.user_job_store import get_user_job_store_name
from models.scheduler_models import SchedulerEventLog
from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed
from .job_restoration import restore_persona_jobs
from .oauth_task_restoration import restore_oauth_monitoring_tasks
from .check_cycle_handler import check_and_execute_due_tasks
from .task_execution_handler import execute_task_async
logger = get_service_logger("task_scheduler")
@@ -34,6 +42,14 @@ class TaskScheduler:
- Database-backed task persistence
- Configurable check intervals
- Automatic retry logic
- User isolation: All tasks are filtered by user_id for isolation
- Per-user job store context: Logs show user's website root for debugging
User Isolation:
- Tasks are filtered by user_id in task loaders
- Execution logs include user_id for tracking
- Per-user statistics are maintained
- Job store names (based on website root) are logged for debugging
"""
def __init__(
@@ -63,7 +79,7 @@ class TaskScheduler:
job_defaults={
'coalesce': True,
'max_instances': 1,
'misfire_grace_time': 300 # 5 minutes grace period
'misfire_grace_time': 3600 # 1 hour grace period for missed jobs
}
)
@@ -89,6 +105,7 @@ class TaskScheduler:
'tasks_failed': 0,
'tasks_skipped': 0,
'last_check': None,
'last_update': datetime.utcnow().isoformat(), # Timestamp for frontend polling
'per_user_stats': {}, # Track metrics per user for user isolation
'active_strategies_count': 0, # Track active strategies with tasks
'last_interval_adjustment': None # Track when interval was last adjusted
@@ -141,7 +158,11 @@ class TaskScheduler:
try:
# Determine initial check interval based on active strategies
initial_interval = await self._determine_optimal_interval()
initial_interval = await determine_optimal_interval(
self,
self.min_check_interval_minutes,
self.max_check_interval_minutes
)
self.current_check_interval_minutes = initial_interval
# Add periodic job to check for due tasks
@@ -155,16 +176,228 @@ class TaskScheduler:
self.scheduler.start()
self._running = True
logger.info(
f"Task scheduler started | "
f"check_interval={initial_interval}min | "
f"registered_types={self.registry.get_registered_types()}"
)
# Check for and execute any missed jobs that are still within grace period
await self._execute_missed_jobs()
# Restore one-time persona generation jobs for users who completed onboarding
await restore_persona_jobs(self)
# Restore/create missing OAuth token monitoring tasks for connected platforms
await restore_oauth_monitoring_tasks(self)
# Get all scheduled APScheduler jobs (including one-time tasks)
all_jobs = self.scheduler.get_jobs()
registered_types = self.registry.get_registered_types()
active_strategies = self.stats.get('active_strategies_count', 0)
# Count OAuth token monitoring tasks from database (recurring weekly tasks)
oauth_tasks_count = 0
oauth_tasks_details = []
try:
db = get_db_session()
if db:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
# Count active tasks
oauth_tasks_count = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.status == 'active'
).count()
# Get all tasks (for detailed logging)
all_oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
total_oauth_tasks = len(all_oauth_tasks)
# Show platform breakdown for ALL tasks (active and inactive)
all_platforms = {}
active_platforms = {}
for task in all_oauth_tasks:
all_platforms[task.platform] = all_platforms.get(task.platform, 0) + 1
if task.status == 'active':
active_platforms[task.platform] = active_platforms.get(task.platform, 0) + 1
if total_oauth_tasks > 0:
# Log details about all tasks (not just active)
for task in all_oauth_tasks:
oauth_tasks_details.append(
f"user={task.user_id}, platform={task.platform}, status={task.status}"
)
if total_oauth_tasks > 0 and oauth_tasks_count == 0:
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
logger.warning(
f"[Scheduler] Found {total_oauth_tasks} OAuth monitoring tasks in database, "
f"but {oauth_tasks_count} are active. "
f"All platforms: {all_platform_summary}. "
f"Task details: {', '.join(oauth_tasks_details[:5])}" # Limit to first 5 for readability
)
elif oauth_tasks_count > 0:
# Show platform breakdown for active tasks
active_platform_summary = ", ".join([f"{platform}: {count}" for platform, count in sorted(active_platforms.items())])
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
# Check for missing platforms (expected: gsc, bing, wordpress, wix)
expected_platforms = ['gsc', 'bing', 'wordpress', 'wix']
missing_in_db = [p for p in expected_platforms if p not in all_platforms]
if missing_in_db:
logger.warning(
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
f"All platforms: {all_platform_summary}. "
f"⚠️ Missing platforms (not connected or no tasks): {', '.join(missing_in_db)}"
)
else:
logger.warning(
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
f"All platforms: {all_platform_summary}"
)
db.close()
except Exception as e:
logger.warning(
f"[Scheduler] Could not get OAuth token monitoring tasks count: {e}. "
f"This may indicate the oauth_token_monitoring_tasks table doesn't exist yet or "
f"tasks haven't been created. Error type: {type(e).__name__}"
)
# Calculate job counts
apscheduler_recurring = 1 # check_due_tasks
apscheduler_one_time = len(all_jobs) - 1
total_recurring = apscheduler_recurring + oauth_tasks_count
total_jobs = len(all_jobs) + oauth_tasks_count
# Build comprehensive startup log message
startup_lines = [
f"[Scheduler] ✅ Task Scheduler Started",
f" ├─ Check Interval: {initial_interval} minutes",
f" ├─ Registered Task Types: {len(registered_types)} ({', '.join(registered_types) if registered_types else 'none'})",
f" ├─ Active Strategies: {active_strategies}",
f" ├─ Total Scheduled Jobs: {total_jobs}",
f" ├─ Recurring Jobs: {total_recurring} (check_due_tasks: {apscheduler_recurring}, OAuth monitoring: {oauth_tasks_count})",
f" └─ One-Time Jobs: {apscheduler_one_time}"
]
# Add APScheduler job details
if all_jobs:
for idx, job in enumerate(all_jobs):
is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0
prefix = " └─" if is_last else " ├─"
next_run = job.next_run_time
trigger_type = type(job.trigger).__name__
# Try to extract user_id from job ID or kwargs for context
user_context = ""
user_id_from_job = None
# First try to get from kwargs
if hasattr(job, 'kwargs') and job.kwargs and job.kwargs.get('user_id'):
user_id_from_job = job.kwargs.get('user_id')
# Otherwise, try to extract from job ID (e.g., "research_persona_user_123..." or "research_persona_user123")
elif job.id and ('research_persona_' in job.id or 'facebook_persona_' in job.id):
# Job ID format: research_persona_{user_id} or facebook_persona_{user_id}
# where user_id is Clerk format (e.g., "user_33Gz1FPI86VDXhRY8QN4ragRFGN")
if job.id.startswith('research_persona_'):
user_id_from_job = job.id.replace('research_persona_', '')
elif job.id.startswith('facebook_persona_'):
user_id_from_job = job.id.replace('facebook_persona_', '')
else:
# Fallback: try to extract from parts (old format with timestamp)
parts = job.id.split('_')
if len(parts) >= 3:
user_id_from_job = parts[2] # Extract user_id from job ID
if user_id_from_job:
try:
db = get_db_session()
if db:
user_job_store = get_user_job_store_name(user_id_from_job, db)
if user_job_store == 'default':
logger.debug(
f"[Scheduler] Job store extraction returned 'default' for user {user_id_from_job}. "
f"This may indicate no onboarding data or website URL not found."
)
user_context = f" | User: {user_id_from_job} | Store: {user_job_store}"
db.close()
except Exception as e:
logger.warning(
f"[Scheduler] Could not extract job store name for user {user_id_from_job}: {e}. "
f"Error type: {type(e).__name__}"
)
user_context = f" | User: {user_id_from_job}"
startup_lines.append(f"{prefix} Job: {job.id} | Trigger: {trigger_type} | Next Run: {next_run}{user_context}")
# Add OAuth token monitoring tasks details
# Show ALL OAuth tasks (active and inactive) for complete visibility
if total_oauth_tasks > 0:
try:
db = get_db_session()
if db:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
# Get ALL tasks, not just active ones
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
for idx, task in enumerate(oauth_tasks):
is_last = idx == len(oauth_tasks) - 1 and len(all_jobs) == 0
prefix = " └─" if is_last else " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
if user_job_store == 'default':
logger.debug(
f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
f"This may indicate no onboarding data or website URL not found."
)
except Exception as e:
logger.warning(
f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
f"Using 'default'. Error type: {type(e).__name__}"
)
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
# Include status in the log line for visibility
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
)
db.close()
except Exception as e:
logger.debug(f"Could not get OAuth token monitoring task details: {e}")
# Log comprehensive startup information in single message
logger.warning("\n".join(startup_lines))
# Save scheduler start event to database
try:
db = get_db_session()
if db:
event_log = SchedulerEventLog(
event_type='start',
event_date=datetime.utcnow(),
check_interval_minutes=initial_interval,
active_strategies_count=active_strategies,
event_data={
'registered_types': registered_types,
'total_jobs': total_jobs,
'recurring_jobs': total_recurring,
'one_time_jobs': apscheduler_one_time,
'oauth_monitoring_tasks': oauth_tasks_count
}
)
db.add(event_log)
db.commit()
db.close()
except Exception as e:
logger.warning(f"Failed to save scheduler start event log: {e}")
except Exception as e:
logger.error(f"Failed to start scheduler: {e}")
raise
async def stop(self):
"""Stop the scheduler gracefully."""
if not self._running:
@@ -182,11 +415,48 @@ class TaskScheduler:
timeout=30
)
# Get final job count before shutdown
all_jobs_before = self.scheduler.get_jobs()
# Shutdown scheduler
self.scheduler.shutdown(wait=True)
self._running = False
logger.info("Task scheduler stopped gracefully")
# Log comprehensive shutdown information (use WARNING level for visibility)
total_checks = self.stats.get('total_checks', 0)
total_executed = self.stats.get('tasks_executed', 0)
total_failed = self.stats.get('tasks_failed', 0)
shutdown_message = (
f"[Scheduler] 🛑 Task Scheduler Stopped\n"
f" ├─ Total Check Cycles: {total_checks}\n"
f" ├─ Total Tasks Executed: {total_executed}\n"
f" ├─ Total Tasks Failed: {total_failed}\n"
f" ├─ Jobs Cancelled: {len(all_jobs_before)}\n"
f" └─ Shutdown: Graceful"
)
logger.warning(shutdown_message)
# Save scheduler stop event to database
try:
db = get_db_session()
if db:
event_log = SchedulerEventLog(
event_type='stop',
event_date=datetime.utcnow(),
check_interval_minutes=self.current_check_interval_minutes,
event_data={
'total_checks': total_checks,
'total_executed': total_executed,
'total_failed': total_failed,
'jobs_cancelled': len(all_jobs_before)
}
)
db.add(event_log)
db.commit()
db.close()
except Exception as e:
logger.warning(f"Failed to save scheduler stop event log: {e}")
except Exception as e:
logger.error(f"Error stopping scheduler: {e}")
@@ -197,109 +467,50 @@ class TaskScheduler:
Main scheduler loop: check for due tasks and execute them.
This runs periodically with intelligent interval adjustment based on active strategies.
"""
self.stats['total_checks'] += 1
self.stats['last_check'] = datetime.utcnow().isoformat()
logger.debug("Checking for due tasks...")
db = None
try:
db = get_db_session()
if db is None:
logger.error("Failed to get database session")
return
# Check for active strategies and adjust interval intelligently
await self._adjust_check_interval_if_needed(db)
# Check each registered task type
for task_type in self.registry.get_registered_types():
await self._process_task_type(task_type, db)
except Exception as e:
error = DatabaseError(
message=f"Error checking for due tasks: {str(e)}",
original_error=e
)
self.exception_handler.handle_exception(error)
finally:
if db:
db.close()
async def _determine_optimal_interval(self) -> int:
"""
Determine optimal check interval based on active strategies.
Returns:
Optimal check interval in minutes
"""
db = None
try:
db = get_db_session()
if db:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
self.stats['active_strategies_count'] = active_count
if active_count > 0:
logger.info(f"Found {active_count} active strategies with tasks - using {self.min_check_interval_minutes}min interval")
return self.min_check_interval_minutes
else:
logger.info(f"No active strategies with tasks - using {self.max_check_interval_minutes}min interval")
return self.max_check_interval_minutes
except Exception as e:
logger.warning(f"Error determining optimal interval: {e}, using default {self.min_check_interval_minutes}min")
finally:
if db:
db.close()
# Default to shorter interval on error (safer)
return self.min_check_interval_minutes
await check_and_execute_due_tasks(self)
async def _adjust_check_interval_if_needed(self, db: Session):
"""
Intelligently adjust check interval based on active strategies.
If there are active strategies with tasks, check more frequently.
If there are no active strategies, check less frequently.
Args:
db: Database session
"""
await adjust_check_interval_if_needed(self, db)
async def _execute_missed_jobs(self):
"""
Check for and execute any missed DateTrigger jobs that are still within grace period.
APScheduler marks jobs as 'missed' if they were scheduled to run while the scheduler wasn't running.
"""
try:
from services.active_strategy_service import ActiveStrategyService
all_jobs = self.scheduler.get_jobs()
now = datetime.utcnow().replace(tzinfo=self.scheduler.timezone)
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
self.stats['active_strategies_count'] = active_count
missed_jobs = []
for job in all_jobs:
# Only check DateTrigger jobs (one-time tasks)
if hasattr(job, 'trigger') and isinstance(job.trigger, DateTrigger):
if job.next_run_time and job.next_run_time < now:
# Job's scheduled time has passed
time_since_scheduled = (now - job.next_run_time).total_seconds()
# Check if still within grace period (1 hour = 3600 seconds)
if time_since_scheduled <= 3600:
missed_jobs.append(job)
# Determine optimal interval
if active_count > 0:
optimal_interval = self.min_check_interval_minutes
else:
optimal_interval = self.max_check_interval_minutes
# Only reschedule if interval needs to change
if optimal_interval != self.current_check_interval_minutes:
logger.info(
f"Adjusting scheduler interval: {self.current_check_interval_minutes}min → {optimal_interval}min | "
f"active_strategies={active_count}"
if missed_jobs:
logger.warning(
f"[Scheduler] Found {len(missed_jobs)} missed job(s) within grace period, executing now..."
)
# Reschedule the job with new interval
self.scheduler.modify_job(
'check_due_tasks',
trigger=self._get_trigger_for_interval(optimal_interval)
)
self.current_check_interval_minutes = optimal_interval
self.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
logger.info(f"Scheduler interval adjusted to {optimal_interval}min")
for job in missed_jobs:
try:
# Execute the job immediately
logger.info(f"[Scheduler] Executing missed job: {job.id}")
await job.func(*job.args, **job.kwargs)
except Exception as e:
logger.error(f"[Scheduler] Error executing missed job {job.id}: {e}")
except Exception as e:
logger.warning(f"Error adjusting check interval: {e}")
logger.warning(f"[Scheduler] Error checking for missed jobs: {e}")
async def trigger_interval_adjustment(self):
"""
@@ -315,14 +526,22 @@ class TaskScheduler:
try:
db = get_db_session()
if db:
await self._adjust_check_interval_if_needed(db)
await adjust_check_interval_if_needed(self, db)
db.close()
else:
logger.warning("Could not get database session for interval adjustment")
except Exception as e:
logger.warning(f"Error triggering interval adjustment: {e}")
async def _process_task_type(self, task_type: str, db: Session):
"""Process due tasks for a specific task type."""
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]:
"""
Process due tasks for a specific task type.
Returns:
Summary dict with 'found', 'executed', 'failed' counts, or None if no tasks
"""
summary = {'found': 0, 'executed': 0, 'failed': 0}
try:
# Get task loader for this type
try:
@@ -334,7 +553,7 @@ class TaskScheduler:
original_error=e
)
self.exception_handler.handle_exception(error)
return
return None
# Load due tasks (with error handling)
try:
@@ -346,28 +565,30 @@ class TaskScheduler:
original_error=e
)
self.exception_handler.handle_exception(error)
return
return None
if not due_tasks:
return
return None
summary['found'] = len(due_tasks)
self.stats['tasks_found'] += len(due_tasks)
logger.info(f"Found {len(due_tasks)} due tasks for type: {task_type}")
# Execute tasks (with concurrency limit)
execution_tasks = []
skipped_count = 0
for task in due_tasks:
if len(self.active_executions) >= self.max_concurrent_executions:
skipped_count = len(due_tasks) - len(execution_tasks)
logger.warning(
f"Max concurrent executions reached ({self.max_concurrent_executions}), "
f"skipping {len(due_tasks) - len(execution_tasks)} tasks"
f"[Scheduler] ⚠️ Max concurrent executions reached ({self.max_concurrent_executions}), "
f"skipping {skipped_count} tasks for {task_type}"
)
break
# Execute task asynchronously
# Note: Each task gets its own database session to prevent concurrent access issues
execution_task = asyncio.create_task(
self._execute_task_async(task_type, task)
execute_task_async(self, task_type, task, summary)
)
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
@@ -379,6 +600,8 @@ class TaskScheduler:
if execution_tasks:
await asyncio.wait(execution_tasks, timeout=300)
return summary
except Exception as e:
error = TaskLoaderError(
message=f"Error processing task type {task_type}: {str(e)}",
@@ -386,169 +609,8 @@ class TaskScheduler:
original_error=e
)
self.exception_handler.handle_exception(error)
return summary
async def _execute_task_async(self, task_type: str, task: Any):
"""
Execute a single task asynchronously with user isolation.
Each task gets its own database session to prevent concurrent access issues,
as SQLAlchemy sessions are not async-safe or concurrent-safe.
User context is extracted and tracked for user isolation.
Args:
task_type: Type of task
task: Task instance from database (detached from original session)
"""
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
db = None
user_id = None
try:
# Extract user context if available (for user isolation tracking)
try:
if hasattr(task, 'strategy') and task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id') and task.strategy_id:
# Will query user_id after we have db session
pass
except Exception as e:
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
logger.info(f"Executing task: {task_id} | user_id: {user_id}")
# Create a new database session for this async task
# SQLAlchemy sessions are not async-safe and cannot be shared across concurrent tasks
db = get_db_session()
if db is None:
error = DatabaseError(
message=f"Failed to get database session for task {task_id}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type
)
self.exception_handler.handle_exception(error, log_level="error")
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
return
# Set database session for exception handler
self.exception_handler.db = db
# Merge the detached task object into this session
# The task object was loaded in a different session and is now detached
from sqlalchemy.orm import object_session
if object_session(task) is None:
# Task is detached, need to merge it into this session
task = db.merge(task)
# Extract user_id after merge if not already available
if user_id is None and hasattr(task, 'strategy'):
try:
if task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id'):
# Query strategy if relationship not loaded
from models.enhanced_strategy_models import EnhancedContentStrategy
strategy = db.query(EnhancedContentStrategy).filter(
EnhancedContentStrategy.id == task.strategy_id
).first()
if strategy:
user_id = strategy.user_id
except Exception as e:
logger.debug(f"Could not extract user_id after merge for task {task_id}: {e}")
# Get executor for this task type
try:
executor = self.registry.get_executor(task_type)
except Exception as e:
from .exception_handler import SchedulerConfigError
error = SchedulerConfigError(
message=f"Failed to get executor for task type {task_type}: {str(e)}",
user_id=user_id,
context={
"task_id": getattr(task, 'id', None),
"task_type": task_type
},
original_error=e
)
self.exception_handler.handle_exception(error)
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
return
# Execute task with its own session (with error handling)
try:
result = await executor.execute_task(task, db)
# Handle result and update statistics
if result.success:
self.stats['tasks_executed'] += 1
self._update_user_stats(user_id, success=True)
logger.info(f"Task executed successfully: {task_id} | user_id: {user_id}")
else:
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
# Create structured error for failed execution
error = TaskExecutionError(
message=result.error_message or "Task execution failed",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
execution_time_ms=result.execution_time_ms,
context={"result_data": result.result_data}
)
self.exception_handler.handle_exception(error, log_level="warning")
# Retry logic if enabled
if self.enable_retries and result.retryable:
await self._schedule_retry(task, result.retry_delay)
except SchedulerException as e:
# Re-raise scheduler exceptions (they're already handled)
raise
except Exception as e:
# Wrap unexpected exceptions
error = TaskExecutionError(
message=f"Unexpected error during task execution: {str(e)}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
original_error=e
)
self.exception_handler.handle_exception(error)
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
except SchedulerException as e:
# Handle scheduler exceptions
self.exception_handler.handle_exception(e)
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
except Exception as e:
# Handle any other unexpected errors
error = TaskExecutionError(
message=f"Unexpected error in task execution wrapper: {str(e)}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
original_error=e
)
self.exception_handler.handle_exception(error)
self.stats['tasks_failed'] += 1
self._update_user_stats(user_id, success=False)
finally:
# Clean up database session
if db:
try:
db.close()
except Exception as e:
logger.error(f"Error closing database session for task {task_id}: {e}")
# Remove from active executions
if task_id in self.active_executions:
del self.active_executions[task_id]
def _update_user_stats(self, user_id: Optional[int], success: bool):
"""
@@ -622,6 +684,117 @@ class TaskScheduler:
return base_stats
def schedule_one_time_task(
self,
func: Callable,
run_date: datetime,
job_id: str,
args: tuple = (),
kwargs: Dict[str, Any] = None,
replace_existing: bool = True
) -> str:
"""
Schedule a one-time task to run at a specific datetime.
Args:
func: Async function to execute
run_date: Datetime when the task should run (must be timezone-aware UTC)
job_id: Unique identifier for this job
args: Positional arguments to pass to func
kwargs: Keyword arguments to pass to func
replace_existing: If True, replace existing job with same ID
Returns:
Job ID
"""
if not self._running:
logger.warning(
f"Scheduler not running, but scheduling job {job_id} anyway. "
"APScheduler will start automatically when needed."
)
try:
# Ensure run_date is timezone-aware (UTC)
if run_date.tzinfo is None:
from datetime import timezone
run_date = run_date.replace(tzinfo=timezone.utc)
logger.debug(f"Added UTC timezone to run_date: {run_date}")
self.scheduler.add_job(
func,
trigger=DateTrigger(run_date=run_date),
args=args,
kwargs=kwargs or {},
id=job_id,
replace_existing=replace_existing,
misfire_grace_time=3600 # 1 hour grace period for missed jobs
)
# Get updated job count
all_jobs = self.scheduler.get_jobs()
one_time_jobs = [j for j in all_jobs if j.id != 'check_due_tasks']
# Extract user_id from kwargs if available for logging and job store
user_id = kwargs.get('user_id', None) if kwargs else None
func_name = func.__name__ if hasattr(func, '__name__') else str(func)
# Get job store name for user (if user_id provided)
job_store_name = 'default'
if user_id:
try:
db = get_db_session()
if db:
job_store_name = get_user_job_store_name(user_id, db)
db.close()
except Exception as e:
logger.warning(f"Could not determine job store for user {user_id}: {e}")
# Note: APScheduler doesn't support dynamic job store creation
# We use 'default' for all jobs but log the user's job store name for debugging
# The actual user isolation is handled through task filtering by user_id
# Log detailed one-time task scheduling information (use WARNING level for visibility)
log_message = (
f"[Scheduler] 📅 Scheduled One-Time Task\n"
f" ├─ Job ID: {job_id}\n"
f" ├─ Function: {func_name}\n"
f" ├─ User ID: {user_id or 'system'}\n"
f" ├─ Job Store: {job_store_name} (user context)\n"
f" ├─ Scheduled For: {run_date}\n"
f" ├─ Replace Existing: {replace_existing}\n"
f" ├─ Total One-Time Jobs: {len(one_time_jobs)}\n"
f" └─ Total Scheduled Jobs: {len(all_jobs)}"
)
logger.warning(log_message)
# Log job scheduling to event log for dashboard
try:
event_db = get_db_session()
if event_db:
event_log = SchedulerEventLog(
event_type='job_scheduled',
event_date=datetime.utcnow(),
job_id=job_id,
job_type='one_time',
user_id=user_id,
event_data={
'function_name': func_name,
'job_store': job_store_name,
'scheduled_for': run_date.isoformat(),
'replace_existing': replace_existing
}
)
event_db.add(event_log)
event_db.commit()
event_db.close()
except Exception as e:
logger.debug(f"Failed to log job scheduling event: {e}")
return job_id
except Exception as e:
logger.error(f"Failed to schedule one-time task {job_id}: {e}")
raise
def is_running(self) -> bool:
"""Check if scheduler is running."""
return self._running

View File

@@ -0,0 +1,197 @@
"""
Task Execution Handler
Handles asynchronous execution of individual tasks with proper session isolation.
"""
from typing import TYPE_CHECKING, Any, Dict, Optional
from sqlalchemy.orm import object_session
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from .exception_handler import (
SchedulerException, TaskExecutionError, DatabaseError, SchedulerConfigError
)
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("task_execution_handler")
async def execute_task_async(
scheduler: 'TaskScheduler',
task_type: str,
task: Any,
summary: Optional[Dict[str, Any]] = None
):
"""
Execute a single task asynchronously with user isolation.
Each task gets its own database session to prevent concurrent access issues,
as SQLAlchemy sessions are not async-safe or concurrent-safe.
User context is extracted and tracked for user isolation.
Args:
scheduler: TaskScheduler instance
task_type: Type of task
task: Task instance from database (detached from original session)
summary: Optional summary dict to update with execution results
"""
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
db = None
user_id = None
try:
# Extract user context if available (for user isolation tracking)
try:
if hasattr(task, 'strategy') and task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id') and task.strategy_id:
# Will query user_id after we have db session
pass
except Exception as e:
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
# Log task execution start (detailed for important tasks)
task_db_id = getattr(task, 'id', None)
if task_db_id:
logger.debug(f"[Scheduler] ▶️ Executing {task_type} task {task_db_id} | user_id: {user_id}")
# Create a new database session for this async task
# SQLAlchemy sessions are not async-safe and cannot be shared across concurrent tasks
db = get_db_session()
if db is None:
error = DatabaseError(
message=f"Failed to get database session for task {task_id}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type
)
scheduler.exception_handler.handle_exception(error, log_level="error")
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
return
# Set database session for exception handler
scheduler.exception_handler.db = db
# Merge the detached task object into this session
# The task object was loaded in a different session and is now detached
if object_session(task) is None:
# Task is detached, need to merge it into this session
task = db.merge(task)
# Extract user_id after merge if not already available
if user_id is None and hasattr(task, 'strategy'):
try:
if task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id'):
# Query strategy if relationship not loaded
from models.enhanced_strategy_models import EnhancedContentStrategy
strategy = db.query(EnhancedContentStrategy).filter(
EnhancedContentStrategy.id == task.strategy_id
).first()
if strategy:
user_id = strategy.user_id
except Exception as e:
logger.debug(f"Could not extract user_id after merge for task {task_id}: {e}")
# Get executor for this task type
try:
executor = scheduler.registry.get_executor(task_type)
except Exception as e:
error = SchedulerConfigError(
message=f"Failed to get executor for task type {task_type}: {str(e)}",
user_id=user_id,
context={
"task_id": getattr(task, 'id', None),
"task_type": task_type
},
original_error=e
)
scheduler.exception_handler.handle_exception(error)
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
return
# Execute task with its own session (with error handling)
try:
result = await executor.execute_task(task, db)
# Handle result and update statistics
if result.success:
scheduler.stats['tasks_executed'] += 1
scheduler._update_user_stats(user_id, success=True)
if summary:
summary['executed'] += 1
logger.debug(f"[Scheduler] ✅ Task {task_id} executed successfully | user_id: {user_id} | time: {result.execution_time_ms}ms")
else:
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
if summary:
summary['failed'] += 1
# Create structured error for failed execution
error = TaskExecutionError(
message=result.error_message or "Task execution failed",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
execution_time_ms=result.execution_time_ms,
context={"result_data": result.result_data}
)
scheduler.exception_handler.handle_exception(error, log_level="warning")
logger.warning(f"[Scheduler] ❌ Task {task_id} failed | user_id: {user_id} | error: {result.error_message}")
# Retry logic if enabled
if scheduler.enable_retries and result.retryable:
await scheduler._schedule_retry(task, result.retry_delay)
except SchedulerException as e:
# Re-raise scheduler exceptions (they're already handled)
raise
except Exception as e:
# Wrap unexpected exceptions
error = TaskExecutionError(
message=f"Unexpected error during task execution: {str(e)}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
original_error=e
)
scheduler.exception_handler.handle_exception(error)
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
except SchedulerException as e:
# Handle scheduler exceptions
scheduler.exception_handler.handle_exception(e)
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
except Exception as e:
# Handle any other unexpected errors
error = TaskExecutionError(
message=f"Unexpected error in task execution wrapper: {str(e)}",
user_id=user_id,
task_id=getattr(task, 'id', None),
task_type=task_type,
original_error=e
)
scheduler.exception_handler.handle_exception(error)
scheduler.stats['tasks_failed'] += 1
scheduler._update_user_stats(user_id, success=False)
finally:
# Clean up database session
if db:
try:
db.close()
except Exception as e:
logger.error(f"Error closing database session for task {task_id}: {e}")
# Remove from active executions
if task_id in scheduler.active_executions:
del scheduler.active_executions[task_id]