AI platform insights monitoring and website analysis monitoring services added

This commit is contained in:
ajaysi
2025-11-11 15:57:45 +05:30
parent d99c7c83a7
commit 7191c7e7f0
81 changed files with 10860 additions and 1567 deletions

View File

@@ -1,5 +1,5 @@
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any
from typing import List, Optional, Dict, Any, Union
from enum import Enum
@@ -81,6 +81,7 @@ class ResearchProvider(str, Enum):
"""Research provider options."""
GOOGLE = "google" # Gemini native grounding
EXA = "exa" # Exa neural search
TAVILY = "tavily" # Tavily AI-powered search
class ResearchConfig(BaseModel):
@@ -100,6 +101,23 @@ class ResearchConfig(BaseModel):
exa_include_domains: List[str] = [] # Domain whitelist
exa_exclude_domains: List[str] = [] # Domain blacklist
exa_search_type: Optional[str] = "auto" # "auto", "keyword", "neural"
# Tavily-specific options
tavily_topic: Optional[str] = "general" # general, news, finance
tavily_search_depth: Optional[str] = "basic" # basic (1 credit), advanced (2 credits)
tavily_include_domains: List[str] = [] # Domain whitelist (max 300)
tavily_exclude_domains: List[str] = [] # Domain blacklist (max 150)
tavily_include_answer: Union[bool, str] = False # basic, advanced, true, false
tavily_include_raw_content: Union[bool, str] = False # markdown, text, true, false
tavily_include_images: bool = False
tavily_include_image_descriptions: bool = False
tavily_include_favicon: bool = False
tavily_time_range: Optional[str] = None # day, week, month, year, d, w, m, y
tavily_start_date: Optional[str] = None # YYYY-MM-DD
tavily_end_date: Optional[str] = None # YYYY-MM-DD
tavily_country: Optional[str] = None # Country code (only for general topic)
tavily_chunks_per_source: int = 3 # 1-3 (only for advanced search)
tavily_auto_parameters: bool = False # Auto-configure parameters based on query
class BlogResearchRequest(BaseModel):

View File

@@ -17,6 +17,7 @@ class OnboardingSession(Base):
website_analyses = relationship('WebsiteAnalysis', back_populates='session', cascade="all, delete-orphan")
research_preferences = relationship('ResearchPreferences', back_populates='session', cascade="all, delete-orphan", uselist=False)
persona_data = relationship('PersonaData', back_populates='session', cascade="all, delete-orphan", uselist=False)
competitor_analyses = relationship('CompetitorAnalysis', back_populates='session', cascade="all, delete-orphan")
def __repr__(self):
return f"<OnboardingSession(id={self.id}, user_id={self.user_id}, step={self.current_step}, progress={self.progress})>"
@@ -188,4 +189,46 @@ class PersonaData(Base):
'research_persona_generated_at': self.research_persona_generated_at.isoformat() if self.research_persona_generated_at else None,
'created_at': self.created_at.isoformat() if self.created_at else None,
'updated_at': self.updated_at.isoformat() if self.updated_at else None
}
class CompetitorAnalysis(Base):
"""Stores competitor website analysis results from scheduled analysis tasks."""
__tablename__ = 'competitor_analyses'
id = Column(Integer, primary_key=True, autoincrement=True)
session_id = Column(Integer, ForeignKey('onboarding_sessions.id', ondelete='CASCADE'), nullable=False)
competitor_url = Column(String(500), nullable=False)
competitor_domain = Column(String(255), nullable=True) # Extracted domain for easier queries
analysis_date = Column(DateTime, default=func.now())
# Complete analysis data (same structure as WebsiteAnalysis)
analysis_data = Column(JSON) # Contains style_analysis, crawl_result, style_patterns, style_guidelines
# Metadata
status = Column(String(50), default='completed') # completed, failed, in_progress
error_message = Column(Text, nullable=True)
warning_message = Column(Text, nullable=True)
created_at = Column(DateTime, default=func.now())
updated_at = Column(DateTime, default=func.now(), onupdate=func.now())
# Relationships
session = relationship('OnboardingSession', back_populates='competitor_analyses')
def __repr__(self):
return f"<CompetitorAnalysis(id={self.id}, url={self.competitor_url}, status={self.status})>"
def to_dict(self):
"""Convert to dictionary for API responses."""
return {
'id': self.id,
'session_id': self.session_id,
'competitor_url': self.competitor_url,
'competitor_domain': self.competitor_domain,
'analysis_date': self.analysis_date.isoformat() if self.analysis_date else None,
'analysis_data': self.analysis_data,
'status': self.status,
'error_message': self.error_message,
'warning_message': self.warning_message,
'created_at': self.created_at.isoformat() if self.created_at else None,
'updated_at': self.updated_at.isoformat() if self.updated_at else None
}

View File

@@ -0,0 +1,100 @@
"""
Platform Insights Monitoring Models
Database models for tracking platform insights (GSC/Bing) fetch tasks.
"""
from sqlalchemy import Column, Integer, String, Text, DateTime, JSON, Index, ForeignKey
from sqlalchemy.orm import relationship
from datetime import datetime
# Import the same Base from enhanced_strategy_models
from models.enhanced_strategy_models import Base
class PlatformInsightsTask(Base):
"""
Model for storing platform insights fetch tasks.
Tracks per-user, per-platform insights fetching with weekly updates.
"""
__tablename__ = "platform_insights_tasks"
id = Column(Integer, primary_key=True, index=True)
# User and Platform Identification
user_id = Column(String(255), nullable=False, index=True) # Clerk user ID (string)
platform = Column(String(50), nullable=False) # 'gsc' or 'bing'
site_url = Column(String(500), nullable=True) # Optional: specific site URL
# Task Status
status = Column(String(50), default='active') # 'active', 'failed', 'paused'
# Execution Tracking
last_check = Column(DateTime, nullable=True)
last_success = Column(DateTime, nullable=True)
last_failure = Column(DateTime, nullable=True)
failure_reason = Column(Text, nullable=True)
# Scheduling
next_check = Column(DateTime, nullable=True, index=True) # Next scheduled check time
# Metadata
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Execution Logs Relationship
execution_logs = relationship(
"PlatformInsightsExecutionLog",
back_populates="task",
cascade="all, delete-orphan"
)
# Indexes for efficient queries
__table_args__ = (
Index('idx_platform_insights_user_platform', 'user_id', 'platform'),
Index('idx_platform_insights_next_check', 'next_check'),
Index('idx_platform_insights_status', 'status'),
)
def __repr__(self):
return f"<PlatformInsightsTask(id={self.id}, user_id={self.user_id}, platform={self.platform}, status={self.status})>"
class PlatformInsightsExecutionLog(Base):
"""
Model for storing platform insights fetch execution logs.
Tracks individual execution attempts with results and error details.
"""
__tablename__ = "platform_insights_execution_logs"
id = Column(Integer, primary_key=True, index=True)
# Task Reference
task_id = Column(Integer, ForeignKey("platform_insights_tasks.id"), nullable=False, index=True)
# Execution Details
execution_date = Column(DateTime, default=datetime.utcnow, nullable=False)
status = Column(String(50), nullable=False) # 'success', 'failed', 'skipped'
# Results
result_data = Column(JSON, nullable=True) # Insights data, metrics, etc.
error_message = Column(Text, nullable=True)
execution_time_ms = Column(Integer, nullable=True)
data_source = Column(String(50), nullable=True) # 'cached', 'api', 'onboarding'
# Metadata
created_at = Column(DateTime, default=datetime.utcnow)
# Relationship to task
task = relationship("PlatformInsightsTask", back_populates="execution_logs")
# Indexes for efficient queries
__table_args__ = (
Index('idx_platform_insights_log_task_execution_date', 'task_id', 'execution_date'),
Index('idx_platform_insights_log_status', 'status'),
)
def __repr__(self):
return f"<PlatformInsightsExecutionLog(id={self.id}, task_id={self.task_id}, status={self.status}, execution_date={self.execution_date})>"

View File

@@ -0,0 +1,48 @@
"""
Scheduler Cumulative Stats Model
Model for storing persistent cumulative scheduler metrics that survive restarts.
"""
from sqlalchemy import Column, Integer, DateTime, Index
from datetime import datetime
from models.enhanced_strategy_models import Base
class SchedulerCumulativeStats(Base):
"""Model for storing cumulative scheduler metrics that persist across restarts"""
__tablename__ = "scheduler_cumulative_stats"
id = Column(Integer, primary_key=True, index=True, default=1) # Always use id=1
total_check_cycles = Column(Integer, default=0, nullable=False)
cumulative_tasks_found = Column(Integer, default=0, nullable=False)
cumulative_tasks_executed = Column(Integer, default=0, nullable=False)
cumulative_tasks_failed = Column(Integer, default=0, nullable=False)
cumulative_tasks_skipped = Column(Integer, default=0, nullable=False)
cumulative_job_completed = Column(Integer, default=0, nullable=False)
cumulative_job_failed = Column(Integer, default=0, nullable=False)
last_updated = Column(DateTime, default=datetime.utcnow, nullable=False, onupdate=datetime.utcnow)
last_check_cycle_id = Column(Integer, nullable=True) # Reference to last check_cycle event log ID
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, nullable=False, onupdate=datetime.utcnow)
__table_args__ = (
Index('idx_scheduler_cumulative_stats_single_row', 'id', unique=True),
)
@classmethod
def get_or_create(cls, db_session):
"""
Get the cumulative stats row (id=1) or create it if it doesn't exist.
Returns:
SchedulerCumulativeStats instance
"""
stats = db_session.query(cls).filter(cls.id == 1).first()
if not stats:
stats = cls(id=1)
db_session.add(stats)
db_session.commit()
return stats

View File

@@ -0,0 +1,105 @@
"""
Website Analysis Monitoring Models
Database models for tracking website analysis tasks and execution logs.
"""
from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean, JSON, Index, ForeignKey
from sqlalchemy.orm import relationship
from datetime import datetime
# Import the same Base from enhanced_strategy_models
from models.enhanced_strategy_models import Base
class WebsiteAnalysisTask(Base):
"""
Model for storing website analysis monitoring tasks.
Tracks per-user, per-URL website analysis with recurring checks.
"""
__tablename__ = "website_analysis_tasks"
id = Column(Integer, primary_key=True, index=True)
# User and URL Identification
user_id = Column(String(255), nullable=False, index=True) # Clerk user ID (string)
website_url = Column(String(500), nullable=False) # URL to analyze
task_type = Column(String(50), nullable=False) # 'user_website' or 'competitor'
competitor_id = Column(String(255), nullable=True) # For competitor tasks (domain or identifier)
# Task Status
status = Column(String(50), default='active') # 'active', 'failed', 'paused'
# Execution Tracking
last_check = Column(DateTime, nullable=True)
last_success = Column(DateTime, nullable=True)
last_failure = Column(DateTime, nullable=True)
failure_reason = Column(Text, nullable=True)
# Scheduling
next_check = Column(DateTime, nullable=True, index=True) # Next scheduled check time
frequency_days = Column(Integer, default=10) # Recurring frequency in days
# Metadata
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Execution Logs Relationship
execution_logs = relationship(
"WebsiteAnalysisExecutionLog",
back_populates="task",
cascade="all, delete-orphan"
)
# Indexes for efficient queries
# Note: Index names match migration script to avoid conflicts
__table_args__ = (
Index('idx_website_analysis_tasks_user_url', 'user_id', 'website_url'),
Index('idx_website_analysis_tasks_user_task_type', 'user_id', 'task_type'),
Index('idx_website_analysis_tasks_next_check', 'next_check'),
Index('idx_website_analysis_tasks_status', 'status'),
Index('idx_website_analysis_tasks_task_type', 'task_type'),
)
def __repr__(self):
return f"<WebsiteAnalysisTask(id={self.id}, user_id={self.user_id}, url={self.website_url}, type={self.task_type}, status={self.status})>"
class WebsiteAnalysisExecutionLog(Base):
"""
Model for storing website analysis execution logs.
Tracks individual execution attempts with results and error details.
"""
__tablename__ = "website_analysis_execution_logs"
id = Column(Integer, primary_key=True, index=True)
# Task Reference
task_id = Column(Integer, ForeignKey("website_analysis_tasks.id"), nullable=False, index=True)
# Execution Details
execution_date = Column(DateTime, default=datetime.utcnow, nullable=False)
status = Column(String(50), nullable=False) # 'success', 'failed', 'skipped', 'running'
# Results
result_data = Column(JSON, nullable=True) # Analysis results (style_analysis, crawl_result, etc.)
error_message = Column(Text, nullable=True)
execution_time_ms = Column(Integer, nullable=True)
# Metadata
created_at = Column(DateTime, default=datetime.utcnow)
# Relationship to task
task = relationship("WebsiteAnalysisTask", back_populates="execution_logs")
# Indexes for efficient queries
# Note: Index names match migration script to avoid conflicts
__table_args__ = (
Index('idx_website_analysis_execution_logs_task_execution_date', 'task_id', 'execution_date'),
Index('idx_website_analysis_execution_logs_status', 'status'),
)
def __repr__(self):
return f"<WebsiteAnalysisExecutionLog(id={self.id}, task_id={self.task_id}, status={self.status}, execution_date={self.execution_date})>"