""" Database models for SEO analysis data storage """ from sqlalchemy import Column, Integer, String, DateTime, Text, JSON, Float, Boolean, ForeignKey, func from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship from datetime import datetime from typing import Dict, Any, List Base = declarative_base() class SEOActionType(Base): """Catalog of supported SEO action types (17 actions).""" __tablename__ = 'seo_action_types' id = Column(Integer, primary_key=True, index=True) code = Column(String(100), unique=True, nullable=False) # e.g., analyze_page_speed name = Column(String(200), nullable=False) category = Column(String(50), nullable=True) # content, technical, performance, etc. description = Column(Text, nullable=True) created_at = Column(DateTime, default=func.now()) updated_at = Column(DateTime, default=func.now(), onupdate=func.now()) def __repr__(self): return f"" class SEOAnalysisSession(Base): """Anchor session for a set of SEO actions and summary.""" __tablename__ = 'seo_analysis_sessions' id = Column(Integer, primary_key=True, index=True) url = Column(String(500), nullable=False, index=True) triggered_by_user_id = Column(String(64), nullable=True) trigger_source = Column(String(32), nullable=True) # manual, schedule, action_followup, system input_context = Column(JSON, nullable=True) status = Column(String(20), default='success') # queued, running, success, failed, cancelled started_at = Column(DateTime, default=func.now(), nullable=False) completed_at = Column(DateTime, nullable=True) summary = Column(Text, nullable=True) overall_score = Column(Integer, nullable=True) health_label = Column(String(50), nullable=True) metrics = Column(JSON, nullable=True) issues_overview = Column(JSON, nullable=True) # Relationships action_runs = relationship("SEOActionRun", back_populates="session", cascade="all, delete-orphan") analyses = relationship("SEOAnalysis", back_populates="session", cascade="all, delete-orphan") def __repr__(self): return f"" class SEOActionRun(Base): """Each execution of a specific action (one of the 17).""" __tablename__ = 'seo_action_runs' id = Column(Integer, primary_key=True, index=True) session_id = Column(Integer, ForeignKey('seo_analysis_sessions.id'), nullable=False) action_type_id = Column(Integer, ForeignKey('seo_action_types.id'), nullable=False) triggered_by_user_id = Column(String(64), nullable=True) input_params = Column(JSON, nullable=True) status = Column(String(20), default='success') started_at = Column(DateTime, default=func.now(), nullable=False) completed_at = Column(DateTime, nullable=True) result_summary = Column(Text, nullable=True) result = Column(JSON, nullable=True) diagnostics = Column(JSON, nullable=True) # Relationships session = relationship("SEOAnalysisSession", back_populates="action_runs") action_type = relationship("SEOActionType") def __repr__(self): return f"" class SEOActionRunLink(Base): """Graph relations between action runs for narrative linkage.""" __tablename__ = 'seo_action_run_links' id = Column(Integer, primary_key=True, index=True) from_action_run_id = Column(Integer, ForeignKey('seo_action_runs.id'), nullable=False) to_action_run_id = Column(Integer, ForeignKey('seo_action_runs.id'), nullable=False) relation = Column(String(50), nullable=False) # followup_of, supports, caused_by created_at = Column(DateTime, default=func.now()) def __repr__(self): return f"" class SEOAnalysis(Base): """Main SEO analysis record""" __tablename__ = 'seo_analyses' id = Column(Integer, primary_key=True, index=True) url = Column(String(500), nullable=False, index=True) overall_score = Column(Integer, nullable=False) health_status = Column(String(50), nullable=False) # excellent, good, needs_improvement, poor, error timestamp = Column(DateTime, default=datetime.utcnow, nullable=False) analysis_data = Column(JSON, nullable=True) # Store complete analysis data session_id = Column(Integer, ForeignKey('seo_analysis_sessions.id'), nullable=True) # Relationships critical_issues = relationship("SEOIssue", back_populates="analysis", cascade="all, delete-orphan") warnings = relationship("SEOWarning", back_populates="analysis", cascade="all, delete-orphan") recommendations = relationship("SEORecommendation", back_populates="analysis", cascade="all, delete-orphan") category_scores = relationship("SEOCategoryScore", back_populates="analysis", cascade="all, delete-orphan") session = relationship("SEOAnalysisSession", back_populates="analyses") def __repr__(self): return f"" class SEOIssue(Base): """Critical SEO issues""" __tablename__ = 'seo_issues' id = Column(Integer, primary_key=True, index=True) analysis_id = Column(Integer, ForeignKey('seo_analyses.id'), nullable=False) session_id = Column(Integer, ForeignKey('seo_analysis_sessions.id'), nullable=True) action_run_id = Column(Integer, ForeignKey('seo_action_runs.id'), nullable=True) issue_text = Column(Text, nullable=False) category = Column(String(100), nullable=True) # url_structure, meta_data, content, etc. priority = Column(String(20), default='critical') # critical, high, medium, low created_at = Column(DateTime, default=datetime.utcnow) # Relationship analysis = relationship("SEOAnalysis", back_populates="critical_issues") def __repr__(self): return f"" class SEOWarning(Base): """SEO warnings""" __tablename__ = 'seo_warnings' id = Column(Integer, primary_key=True, index=True) analysis_id = Column(Integer, ForeignKey('seo_analyses.id'), nullable=False) session_id = Column(Integer, ForeignKey('seo_analysis_sessions.id'), nullable=True) action_run_id = Column(Integer, ForeignKey('seo_action_runs.id'), nullable=True) warning_text = Column(Text, nullable=False) category = Column(String(100), nullable=True) priority = Column(String(20), default='medium') created_at = Column(DateTime, default=datetime.utcnow) # Relationship analysis = relationship("SEOAnalysis", back_populates="warnings") def __repr__(self): return f"" class SEORecommendation(Base): """SEO recommendations""" __tablename__ = 'seo_recommendations' id = Column(Integer, primary_key=True, index=True) analysis_id = Column(Integer, ForeignKey('seo_analyses.id'), nullable=False) session_id = Column(Integer, ForeignKey('seo_analysis_sessions.id'), nullable=True) action_run_id = Column(Integer, ForeignKey('seo_action_runs.id'), nullable=True) recommendation_text = Column(Text, nullable=False) category = Column(String(100), nullable=True) difficulty = Column(String(20), default='medium') # easy, medium, hard estimated_impact = Column(String(20), default='medium') # high, medium, low created_at = Column(DateTime, default=datetime.utcnow) # Relationship analysis = relationship("SEOAnalysis", back_populates="recommendations") def __repr__(self): return f"" class SEOCategoryScore(Base): """Individual category scores""" __tablename__ = 'seo_category_scores' id = Column(Integer, primary_key=True, index=True) analysis_id = Column(Integer, ForeignKey('seo_analyses.id'), nullable=False) category = Column(String(100), nullable=False) # url_structure, meta_data, content, etc. score = Column(Integer, nullable=False) max_score = Column(Integer, default=100) details = Column(JSON, nullable=True) # Store category-specific details # Relationship analysis = relationship("SEOAnalysis", back_populates="category_scores") def __repr__(self): return f"" class SEOAnalysisHistory(Base): """Historical SEO analysis data for tracking improvements""" __tablename__ = 'seo_analysis_history' id = Column(Integer, primary_key=True, index=True) url = Column(String(500), nullable=False, index=True) analysis_date = Column(DateTime, default=datetime.utcnow, nullable=False) overall_score = Column(Integer, nullable=False) health_status = Column(String(50), nullable=False) score_change = Column(Integer, default=0) # Change from previous analysis # Category scores for tracking url_structure_score = Column(Integer, nullable=True) meta_data_score = Column(Integer, nullable=True) content_score = Column(Integer, nullable=True) technical_score = Column(Integer, nullable=True) performance_score = Column(Integer, nullable=True) accessibility_score = Column(Integer, nullable=True) user_experience_score = Column(Integer, nullable=True) security_score = Column(Integer, nullable=True) # Issue counts critical_issues_count = Column(Integer, default=0) warnings_count = Column(Integer, default=0) recommendations_count = Column(Integer, default=0) def __repr__(self): return f"" class SEOKeywordAnalysis(Base): """Keyword analysis data""" __tablename__ = 'seo_keyword_analyses' id = Column(Integer, primary_key=True, index=True) analysis_id = Column(Integer, ForeignKey('seo_analyses.id'), nullable=False) keyword = Column(String(200), nullable=False) density = Column(Float, nullable=True) count = Column(Integer, default=0) in_title = Column(Boolean, default=False) in_headings = Column(Boolean, default=False) in_alt_text = Column(Boolean, default=False) in_meta_description = Column(Boolean, default=False) def __repr__(self): return f"" class SEOTechnicalData(Base): """Technical SEO data""" __tablename__ = 'seo_technical_data' id = Column(Integer, primary_key=True, index=True) analysis_id = Column(Integer, ForeignKey('seo_analyses.id'), nullable=False) # Meta data title = Column(Text, nullable=True) title_length = Column(Integer, nullable=True) meta_description = Column(Text, nullable=True) meta_description_length = Column(Integer, nullable=True) # Technical elements has_canonical = Column(Boolean, default=False) canonical_url = Column(String(500), nullable=True) has_schema_markup = Column(Boolean, default=False) schema_types = Column(JSON, nullable=True) has_hreflang = Column(Boolean, default=False) hreflang_data = Column(JSON, nullable=True) # Social media og_tags_count = Column(Integer, default=0) twitter_tags_count = Column(Integer, default=0) # Technical files robots_txt_exists = Column(Boolean, default=False) sitemap_exists = Column(Boolean, default=False) def __repr__(self): return f"" class SEOContentData(Base): """Content analysis data""" __tablename__ = 'seo_content_data' id = Column(Integer, primary_key=True, index=True) analysis_id = Column(Integer, ForeignKey('seo_analyses.id'), nullable=False) # Content metrics word_count = Column(Integer, default=0) char_count = Column(Integer, default=0) headings_count = Column(Integer, default=0) h1_count = Column(Integer, default=0) h2_count = Column(Integer, default=0) # Media images_count = Column(Integer, default=0) images_with_alt = Column(Integer, default=0) images_without_alt = Column(Integer, default=0) # Links internal_links_count = Column(Integer, default=0) external_links_count = Column(Integer, default=0) # Quality metrics readability_score = Column(Float, nullable=True) spelling_errors = Column(Integer, default=0) def __repr__(self): return f"" class SEOPerformanceData(Base): """Performance analysis data""" __tablename__ = 'seo_performance_data' id = Column(Integer, primary_key=True, index=True) analysis_id = Column(Integer, ForeignKey('seo_analyses.id'), nullable=False) # Load time load_time = Column(Float, nullable=True) # Compression is_compressed = Column(Boolean, default=False) compression_type = Column(String(50), nullable=True) # gzip, br, etc. # Caching has_cache_headers = Column(Boolean, default=False) cache_control = Column(String(200), nullable=True) # HTTP headers content_encoding = Column(String(100), nullable=True) server_info = Column(String(200), nullable=True) def __repr__(self): return f"" class SEOAccessibilityData(Base): """Accessibility analysis data""" __tablename__ = 'seo_accessibility_data' id = Column(Integer, primary_key=True, index=True) analysis_id = Column(Integer, ForeignKey('seo_analyses.id'), nullable=False) # Alt text images_with_alt = Column(Integer, default=0) images_without_alt = Column(Integer, default=0) alt_text_ratio = Column(Float, nullable=True) # Forms form_fields_count = Column(Integer, default=0) labeled_fields_count = Column(Integer, default=0) label_ratio = Column(Float, nullable=True) # ARIA aria_elements_count = Column(Integer, default=0) def __repr__(self): return f"" class SEOUserExperienceData(Base): """User experience analysis data""" __tablename__ = 'seo_user_experience_data' id = Column(Integer, primary_key=True, index=True) analysis_id = Column(Integer, ForeignKey('seo_analyses.id'), nullable=False) # Mobile is_mobile_friendly = Column(Boolean, default=False) has_viewport = Column(Boolean, default=False) # CTAs ctas_found = Column(JSON, nullable=True) # List of found CTAs cta_count = Column(Integer, default=0) # Navigation has_navigation = Column(Boolean, default=False) nav_elements_count = Column(Integer, default=0) # Contact info has_contact_info = Column(Boolean, default=False) # Social media social_links_count = Column(Integer, default=0) social_links = Column(JSON, nullable=True) def __repr__(self): return f"" class SEOSecurityData(Base): """Security headers analysis data""" __tablename__ = 'seo_security_data' id = Column(Integer, primary_key=True, index=True) analysis_id = Column(Integer, ForeignKey('seo_analyses.id'), nullable=False) # Security headers has_x_frame_options = Column(Boolean, default=False) has_x_content_type_options = Column(Boolean, default=False) has_x_xss_protection = Column(Boolean, default=False) has_strict_transport_security = Column(Boolean, default=False) has_content_security_policy = Column(Boolean, default=False) has_referrer_policy = Column(Boolean, default=False) # HTTPS is_https = Column(Boolean, default=False) # Total security score security_score = Column(Integer, default=0) present_headers = Column(JSON, nullable=True) missing_headers = Column(JSON, nullable=True) def __repr__(self): return f"" # Helper functions for data conversion def create_analysis_from_result(result: 'SEOAnalysisResult') -> SEOAnalysis: """Create SEOAnalysis record from analysis result""" return SEOAnalysis( url=result.url, overall_score=result.overall_score, health_status=result.health_status, timestamp=result.timestamp, analysis_data=result.data ) def create_issues_from_result(analysis_id: int, result: 'SEOAnalysisResult') -> List[SEOIssue]: """Create SEOIssue records from analysis result""" issues = [] for issue_data in result.critical_issues: # Handle both string and dictionary formats if isinstance(issue_data, dict): issue_text = issue_data.get('message', str(issue_data)) category = issue_data.get('category', extract_category_from_text(issue_text)) else: issue_text = str(issue_data) category = extract_category_from_text(issue_text) issues.append(SEOIssue( analysis_id=analysis_id, issue_text=issue_text, category=category, priority='critical' )) return issues def create_warnings_from_result(analysis_id: int, result: 'SEOAnalysisResult') -> List[SEOWarning]: """Create SEOWarning records from analysis result""" warnings = [] for warning_data in result.warnings: # Handle both string and dictionary formats if isinstance(warning_data, dict): warning_text = warning_data.get('message', str(warning_data)) category = warning_data.get('category', extract_category_from_text(warning_text)) else: warning_text = str(warning_data) category = extract_category_from_text(warning_text) warnings.append(SEOWarning( analysis_id=analysis_id, warning_text=warning_text, category=category, priority='medium' )) return warnings def create_recommendations_from_result(analysis_id: int, result: 'SEOAnalysisResult') -> List[SEORecommendation]: """Create SEORecommendation records from analysis result""" recommendations = [] for rec_data in result.recommendations: # Handle both string and dictionary formats if isinstance(rec_data, dict): rec_text = rec_data.get('message', str(rec_data)) category = rec_data.get('category', extract_category_from_text(rec_text)) else: rec_text = str(rec_data) category = extract_category_from_text(rec_text) recommendations.append(SEORecommendation( analysis_id=analysis_id, recommendation_text=rec_text, category=category, difficulty='medium', estimated_impact='medium' )) return recommendations def create_category_scores_from_result(analysis_id: int, result: 'SEOAnalysisResult') -> List[SEOCategoryScore]: """Create SEOCategoryScore records from analysis result""" scores = [] for category, data in result.data.items(): if isinstance(data, dict) and 'score' in data: scores.append(SEOCategoryScore( analysis_id=analysis_id, category=category, score=data['score'], max_score=100, details=data )) return scores def extract_category_from_text(text: str) -> str: """Extract category from issue/warning/recommendation text""" text_lower = text.lower() if any(word in text_lower for word in ['title', 'meta', 'description']): return 'meta_data' elif any(word in text_lower for word in ['https', 'url', 'security']): return 'url_structure' elif any(word in text_lower for word in ['content', 'word', 'heading', 'image']): return 'content_analysis' elif any(word in text_lower for word in ['schema', 'canonical', 'technical']): return 'technical_seo' elif any(word in text_lower for word in ['speed', 'load', 'performance']): return 'performance' elif any(word in text_lower for word in ['alt', 'accessibility', 'aria']): return 'accessibility' elif any(word in text_lower for word in ['mobile', 'cta', 'navigation']): return 'user_experience' else: return 'general'