Scheduled research persona generation

This commit is contained in:
ajaysi
2025-11-05 08:51:00 +05:30
parent 55087c4f37
commit d99c7c83a7
98 changed files with 14518 additions and 828 deletions

View File

@@ -0,0 +1,98 @@
"""
OAuth Token Monitoring Models
Database models for tracking OAuth token status and monitoring tasks.
"""
from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean, JSON, Index, ForeignKey
from sqlalchemy.orm import relationship
from datetime import datetime
# Import the same Base from enhanced_strategy_models
from models.enhanced_strategy_models import Base
class OAuthTokenMonitoringTask(Base):
"""
Model for storing OAuth token monitoring tasks.
Tracks per-user, per-platform token monitoring with weekly checks.
"""
__tablename__ = "oauth_token_monitoring_tasks"
id = Column(Integer, primary_key=True, index=True)
# User and Platform Identification
user_id = Column(String(255), nullable=False, index=True) # Clerk user ID (string)
platform = Column(String(50), nullable=False) # 'gsc', 'bing', 'wordpress', 'wix'
# Task Status
status = Column(String(50), default='active') # 'active', 'failed', 'paused'
# Execution Tracking
last_check = Column(DateTime, nullable=True)
last_success = Column(DateTime, nullable=True)
last_failure = Column(DateTime, nullable=True)
failure_reason = Column(Text, nullable=True)
# Scheduling
next_check = Column(DateTime, nullable=True, index=True) # Next scheduled check time
# Metadata
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Execution Logs Relationship
execution_logs = relationship(
"OAuthTokenExecutionLog",
back_populates="task",
cascade="all, delete-orphan"
)
# Indexes for efficient queries
__table_args__ = (
Index('idx_user_platform', 'user_id', 'platform'),
Index('idx_next_check', 'next_check'),
Index('idx_status', 'status'),
)
def __repr__(self):
return f"<OAuthTokenMonitoringTask(id={self.id}, user_id={self.user_id}, platform={self.platform}, status={self.status})>"
class OAuthTokenExecutionLog(Base):
"""
Model for storing OAuth token monitoring execution logs.
Tracks individual execution attempts with results and error details.
"""
__tablename__ = "oauth_token_execution_logs"
id = Column(Integer, primary_key=True, index=True)
# Task Reference
task_id = Column(Integer, ForeignKey("oauth_token_monitoring_tasks.id"), nullable=False, index=True)
# Execution Details
execution_date = Column(DateTime, default=datetime.utcnow, nullable=False)
status = Column(String(50), nullable=False) # 'success', 'failed', 'skipped'
# Results
result_data = Column(JSON, nullable=True) # Token status, expiration info, etc.
error_message = Column(Text, nullable=True)
execution_time_ms = Column(Integer, nullable=True)
# Metadata
created_at = Column(DateTime, default=datetime.utcnow)
# Relationship to task
task = relationship("OAuthTokenMonitoringTask", back_populates="execution_logs")
# Indexes for efficient queries
__table_args__ = (
Index('idx_task_execution_date', 'task_id', 'execution_date'),
Index('idx_status', 'status'),
)
def __repr__(self):
return f"<OAuthTokenExecutionLog(id={self.id}, task_id={self.task_id}, status={self.status}, execution_date={self.execution_date})>"

View File

@@ -157,12 +157,14 @@ class PersonaData(Base):
id = Column(Integer, primary_key=True, autoincrement=True)
session_id = Column(Integer, ForeignKey('onboarding_sessions.id', ondelete='CASCADE'), nullable=False)
# Persona generation results
# Persona generation results
core_persona = Column(JSON) # Core persona data (demographics, psychographics, etc.)
platform_personas = Column(JSON) # Platform-specific personas (LinkedIn, Twitter, etc.)
quality_metrics = Column(JSON) # Quality assessment metrics
selected_platforms = Column(JSON) # Array of selected platforms
research_persona = Column(JSON, nullable=True) # AI-generated research persona with personalized defaults
research_persona_generated_at = Column(DateTime, nullable=True) # Timestamp for 7-day TTL cache validation
# Metadata
created_at = Column(DateTime, default=func.now())
updated_at = Column(DateTime, default=func.now(), onupdate=func.now())
@@ -182,6 +184,8 @@ class PersonaData(Base):
'platform_personas': self.platform_personas,
'quality_metrics': self.quality_metrics,
'selected_platforms': self.selected_platforms,
'research_persona': self.research_persona,
'research_persona_generated_at': self.research_persona_generated_at.isoformat() if self.research_persona_generated_at else None,
'created_at': self.created_at.isoformat() if self.created_at else None,
'updated_at': self.updated_at.isoformat() if self.updated_at else None
}

View File

@@ -0,0 +1,110 @@
"""
Research Persona Models
Pydantic models for AI-generated research personas.
"""
from typing import Dict, Any, List, Optional
from pydantic import BaseModel, Field
from datetime import datetime
class ResearchPreset(BaseModel):
"""Research preset configuration."""
name: str
keywords: str
industry: str
target_audience: str
research_mode: str = Field(..., description="basic, comprehensive, or targeted")
config: Dict[str, Any] = Field(default_factory=dict, description="Complete ResearchConfig object")
description: Optional[str] = None
icon: Optional[str] = None
gradient: Optional[str] = None
class ResearchPersona(BaseModel):
"""AI-generated research persona providing personalized defaults and suggestions."""
# Smart Defaults
default_industry: str = Field(..., description="Default industry from onboarding data")
default_target_audience: str = Field(..., description="Default target audience from onboarding data")
default_research_mode: str = Field(..., description="basic, comprehensive, or targeted")
default_provider: str = Field(..., description="google or exa")
# Keyword Intelligence
suggested_keywords: List[str] = Field(default_factory=list, description="8-12 relevant keywords")
keyword_expansion_patterns: Dict[str, List[str]] = Field(
default_factory=dict,
description="Mapping of keywords to expanded, industry-specific terms"
)
# Domain & Source Intelligence
suggested_exa_domains: List[str] = Field(
default_factory=list,
description="4-6 authoritative domains for the industry"
)
suggested_exa_category: Optional[str] = Field(
None,
description="Suggested Exa category based on industry"
)
# Query Enhancement Intelligence
research_angles: List[str] = Field(
default_factory=list,
description="5-8 alternative research angles/focuses"
)
query_enhancement_rules: Dict[str, str] = Field(
default_factory=dict,
description="Templates for improving vague user queries"
)
# Research History Insights
recommended_presets: List[ResearchPreset] = Field(
default_factory=list,
description="3-5 personalized research preset templates"
)
# Research Preferences
research_preferences: Dict[str, Any] = Field(
default_factory=dict,
description="Structured research preferences from onboarding"
)
# Metadata
generated_at: Optional[str] = Field(None, description="ISO timestamp of generation")
confidence_score: Optional[float] = Field(None, ge=0.0, le=1.0, description="Confidence score 0-1")
version: Optional[str] = Field(None, description="Schema version")
class Config:
json_schema_extra = {
"example": {
"default_industry": "Healthcare",
"default_target_audience": "Medical professionals and healthcare administrators",
"default_research_mode": "comprehensive",
"default_provider": "exa",
"suggested_keywords": ["telemedicine", "patient care", "healthcare technology"],
"keyword_expansion_patterns": {
"AI": ["healthcare AI", "medical AI", "clinical AI"],
"tools": ["medical devices", "clinical tools"]
},
"suggested_exa_domains": ["pubmed.gov", "nejm.org", "thelancet.com"],
"suggested_exa_category": "research paper",
"research_angles": [
"Compare telemedicine platforms",
"Telemedicine ROI analysis",
"Latest telemedicine trends"
],
"query_enhancement_rules": {
"vague_ai": "Research: AI applications in Healthcare for Medical professionals",
"vague_tools": "Compare top Healthcare tools"
},
"recommended_presets": [],
"research_preferences": {
"research_depth": "comprehensive",
"content_types": ["blog", "article"]
},
"generated_at": "2024-01-01T00:00:00Z",
"confidence_score": 0.85,
"version": "1.0"
}
}

View File

@@ -0,0 +1,48 @@
"""
Scheduler Event Models
Models for tracking scheduler-level events and history.
"""
from sqlalchemy import Column, Integer, String, Text, DateTime, JSON, Float
from datetime import datetime
# Import the same Base from enhanced_strategy_models
from models.enhanced_strategy_models import Base
class SchedulerEventLog(Base):
"""Model for storing scheduler-level events (check cycles, interval adjustments, etc.)"""
__tablename__ = "scheduler_event_logs"
id = Column(Integer, primary_key=True, index=True)
event_type = Column(String(50), nullable=False) # 'check_cycle', 'interval_adjustment', 'start', 'stop', 'job_scheduled', 'job_cancelled'
event_date = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
# Event details
check_cycle_number = Column(Integer, nullable=True) # For check_cycle events
check_interval_minutes = Column(Integer, nullable=True) # Interval at time of event
previous_interval_minutes = Column(Integer, nullable=True) # For interval_adjustment events
new_interval_minutes = Column(Integer, nullable=True) # For interval_adjustment events
# Task execution summary for check cycles
tasks_found = Column(Integer, nullable=True)
tasks_executed = Column(Integer, nullable=True)
tasks_failed = Column(Integer, nullable=True)
tasks_by_type = Column(JSON, nullable=True) # {'monitoring_task': 5, ...}
# Job information
job_id = Column(String(200), nullable=True) # For job_scheduled/cancelled events
job_type = Column(String(50), nullable=True) # 'recurring', 'one_time'
user_id = Column(String(200), nullable=True, index=True) # For user isolation
# Performance metrics
check_duration_seconds = Column(Float, nullable=True) # How long the check cycle took
active_strategies_count = Column(Integer, nullable=True)
active_executions = Column(Integer, nullable=True)
# Additional context
event_data = Column(JSON, nullable=True) # Additional event-specific data
error_message = Column(Text, nullable=True) # For error events
created_at = Column(DateTime, default=datetime.utcnow)