Files
ALwrity/backend/services/seo_analyzer/utils.py
2025-08-06 12:48:02 +05:30

106 lines
4.3 KiB
Python

"""
SEO Analyzer Utilities
Contains utility classes for HTML fetching and AI insight generation.
"""
import requests
from typing import Optional, Dict, List, Any
from loguru import logger
class HTMLFetcher:
"""Utility class for fetching HTML content from URLs"""
def __init__(self):
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
})
def fetch_html(self, url: str) -> Optional[str]:
"""Fetch HTML content with error handling"""
try:
response = self.session.get(url, timeout=30)
response.raise_for_status()
return response.text
except Exception as e:
logger.error(f"Error fetching HTML from {url}: {e}")
return None
class AIInsightGenerator:
"""Utility class for generating AI-powered insights from analysis data"""
def generate_insights(self, analysis_data: Dict[str, Any], url: str) -> List[Dict[str, Any]]:
"""Generate AI-powered insights based on analysis data"""
insights = []
# Analyze overall performance
total_issues = sum(len(data.get('issues', [])) for data in analysis_data.values() if isinstance(data, dict))
total_warnings = sum(len(data.get('warnings', [])) for data in analysis_data.values() if isinstance(data, dict))
if total_issues > 5:
insights.append({
'type': 'critical',
'message': f'High number of critical issues ({total_issues}) detected',
'priority': 'high',
'action': 'fix_critical_issues',
'description': 'Multiple critical SEO issues need immediate attention to improve search rankings.'
})
# Content quality insights
content_data = analysis_data.get('content_analysis', {})
if content_data.get('word_count', 0) < 300:
insights.append({
'type': 'warning',
'message': 'Content is too thin for good SEO',
'priority': 'medium',
'action': 'expand_content',
'description': 'Add more valuable, relevant content to improve search rankings and user engagement.'
})
# Technical SEO insights
technical_data = analysis_data.get('technical_seo', {})
if not technical_data.get('has_canonical', False):
insights.append({
'type': 'critical',
'message': 'Missing canonical URL can cause duplicate content issues',
'priority': 'high',
'action': 'add_canonical',
'description': 'Canonical URLs help prevent duplicate content penalties.'
})
# Security insights
security_data = analysis_data.get('security_headers', {})
if security_data.get('total_headers', 0) < 3:
insights.append({
'type': 'warning',
'message': 'Insufficient security headers',
'priority': 'medium',
'action': 'improve_security',
'description': 'Security headers protect against common web vulnerabilities.'
})
# Performance insights
performance_data = analysis_data.get('performance', {})
if performance_data.get('load_time', 0) > 3:
insights.append({
'type': 'critical',
'message': 'Page load time is too slow',
'priority': 'high',
'action': 'optimize_performance',
'description': 'Slow loading pages negatively impact user experience and search rankings.'
})
# URL structure insights
url_data = analysis_data.get('url_structure', {})
if not url_data.get('has_https', False):
insights.append({
'type': 'critical',
'message': 'Website is not using HTTPS',
'priority': 'high',
'action': 'enable_https',
'description': 'HTTPS is required for security and is a ranking factor for search engines.'
})
return insights