106 lines
4.3 KiB
Python
106 lines
4.3 KiB
Python
"""
|
|
SEO Analyzer Utilities
|
|
Contains utility classes for HTML fetching and AI insight generation.
|
|
"""
|
|
|
|
import requests
|
|
from typing import Optional, Dict, List, Any
|
|
from loguru import logger
|
|
|
|
|
|
class HTMLFetcher:
|
|
"""Utility class for fetching HTML content from URLs"""
|
|
|
|
def __init__(self):
|
|
self.session = requests.Session()
|
|
self.session.headers.update({
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
})
|
|
|
|
def fetch_html(self, url: str) -> Optional[str]:
|
|
"""Fetch HTML content with error handling"""
|
|
try:
|
|
response = self.session.get(url, timeout=30)
|
|
response.raise_for_status()
|
|
return response.text
|
|
except Exception as e:
|
|
logger.error(f"Error fetching HTML from {url}: {e}")
|
|
return None
|
|
|
|
|
|
class AIInsightGenerator:
|
|
"""Utility class for generating AI-powered insights from analysis data"""
|
|
|
|
def generate_insights(self, analysis_data: Dict[str, Any], url: str) -> List[Dict[str, Any]]:
|
|
"""Generate AI-powered insights based on analysis data"""
|
|
insights = []
|
|
|
|
# Analyze overall performance
|
|
total_issues = sum(len(data.get('issues', [])) for data in analysis_data.values() if isinstance(data, dict))
|
|
total_warnings = sum(len(data.get('warnings', [])) for data in analysis_data.values() if isinstance(data, dict))
|
|
|
|
if total_issues > 5:
|
|
insights.append({
|
|
'type': 'critical',
|
|
'message': f'High number of critical issues ({total_issues}) detected',
|
|
'priority': 'high',
|
|
'action': 'fix_critical_issues',
|
|
'description': 'Multiple critical SEO issues need immediate attention to improve search rankings.'
|
|
})
|
|
|
|
# Content quality insights
|
|
content_data = analysis_data.get('content_analysis', {})
|
|
if content_data.get('word_count', 0) < 300:
|
|
insights.append({
|
|
'type': 'warning',
|
|
'message': 'Content is too thin for good SEO',
|
|
'priority': 'medium',
|
|
'action': 'expand_content',
|
|
'description': 'Add more valuable, relevant content to improve search rankings and user engagement.'
|
|
})
|
|
|
|
# Technical SEO insights
|
|
technical_data = analysis_data.get('technical_seo', {})
|
|
if not technical_data.get('has_canonical', False):
|
|
insights.append({
|
|
'type': 'critical',
|
|
'message': 'Missing canonical URL can cause duplicate content issues',
|
|
'priority': 'high',
|
|
'action': 'add_canonical',
|
|
'description': 'Canonical URLs help prevent duplicate content penalties.'
|
|
})
|
|
|
|
# Security insights
|
|
security_data = analysis_data.get('security_headers', {})
|
|
if security_data.get('total_headers', 0) < 3:
|
|
insights.append({
|
|
'type': 'warning',
|
|
'message': 'Insufficient security headers',
|
|
'priority': 'medium',
|
|
'action': 'improve_security',
|
|
'description': 'Security headers protect against common web vulnerabilities.'
|
|
})
|
|
|
|
# Performance insights
|
|
performance_data = analysis_data.get('performance', {})
|
|
if performance_data.get('load_time', 0) > 3:
|
|
insights.append({
|
|
'type': 'critical',
|
|
'message': 'Page load time is too slow',
|
|
'priority': 'high',
|
|
'action': 'optimize_performance',
|
|
'description': 'Slow loading pages negatively impact user experience and search rankings.'
|
|
})
|
|
|
|
# URL structure insights
|
|
url_data = analysis_data.get('url_structure', {})
|
|
if not url_data.get('has_https', False):
|
|
insights.append({
|
|
'type': 'critical',
|
|
'message': 'Website is not using HTTPS',
|
|
'priority': 'high',
|
|
'action': 'enable_https',
|
|
'description': 'HTTPS is required for security and is a ranking factor for search engines.'
|
|
})
|
|
|
|
return insights |