Base code
This commit is contained in:
796
backend/services/seo_analyzer/analyzers.py
Normal file
796
backend/services/seo_analyzer/analyzers.py
Normal file
@@ -0,0 +1,796 @@
|
||||
"""
|
||||
SEO Analyzers Module
|
||||
Contains all individual SEO analysis components.
|
||||
"""
|
||||
|
||||
import re
|
||||
import time
|
||||
import requests
|
||||
from urllib.parse import urlparse, urljoin
|
||||
from typing import Dict, List, Any, Optional
|
||||
from bs4 import BeautifulSoup
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class BaseAnalyzer:
|
||||
"""Base class for all SEO analyzers"""
|
||||
|
||||
def __init__(self):
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
})
|
||||
|
||||
|
||||
class URLStructureAnalyzer(BaseAnalyzer):
|
||||
"""Analyzes URL structure and security"""
|
||||
|
||||
def analyze(self, url: str) -> Dict[str, Any]:
|
||||
"""Enhanced URL structure analysis with specific fixes"""
|
||||
parsed = urlparse(url)
|
||||
issues = []
|
||||
warnings = []
|
||||
recommendations = []
|
||||
|
||||
# Check URL length
|
||||
if len(url) > 2000:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': f'URL is too long ({len(url)} characters)',
|
||||
'location': 'URL',
|
||||
'current_value': url,
|
||||
'fix': 'Shorten URL to under 2000 characters',
|
||||
'code_example': f'<a href="/shorter-path">Link</a>',
|
||||
'action': 'shorten_url'
|
||||
})
|
||||
|
||||
# Check for hyphens
|
||||
if '_' in parsed.path and '-' not in parsed.path:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': 'URL uses underscores instead of hyphens',
|
||||
'location': 'URL',
|
||||
'current_value': parsed.path,
|
||||
'fix': 'Replace underscores with hyphens',
|
||||
'code_example': f'<a href="{parsed.path.replace("_", "-")}">Link</a>',
|
||||
'action': 'replace_underscores'
|
||||
})
|
||||
|
||||
# Check for special characters
|
||||
special_chars = re.findall(r'[^a-zA-Z0-9\-_/]', parsed.path)
|
||||
if special_chars:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'URL contains special characters: {", ".join(set(special_chars))}',
|
||||
'location': 'URL',
|
||||
'current_value': parsed.path,
|
||||
'fix': 'Remove special characters from URL',
|
||||
'code_example': f'<a href="/clean-url">Link</a>',
|
||||
'action': 'remove_special_chars'
|
||||
})
|
||||
|
||||
# Check for HTTPS
|
||||
if parsed.scheme != 'https':
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': 'URL is not using HTTPS',
|
||||
'location': 'URL',
|
||||
'current_value': parsed.scheme,
|
||||
'fix': 'Redirect to HTTPS',
|
||||
'code_example': 'RewriteEngine On\nRewriteCond %{HTTPS} off\nRewriteRule ^(.*)$ https://%{HTTP_HOST}%{REQUEST_URI} [L,R=301]',
|
||||
'action': 'enable_https'
|
||||
})
|
||||
|
||||
score = max(0, 100 - len(issues) * 25 - len(warnings) * 10)
|
||||
|
||||
return {
|
||||
'score': score,
|
||||
'issues': issues,
|
||||
'warnings': warnings,
|
||||
'recommendations': recommendations,
|
||||
'url_length': len(url),
|
||||
'has_https': parsed.scheme == 'https',
|
||||
'has_hyphens': '-' in parsed.path,
|
||||
'special_chars_count': len(special_chars)
|
||||
}
|
||||
|
||||
|
||||
class MetaDataAnalyzer(BaseAnalyzer):
|
||||
"""Analyzes meta data and technical SEO elements"""
|
||||
|
||||
def analyze(self, html_content: str, url: str) -> Dict[str, Any]:
|
||||
"""Enhanced meta data analysis with specific element locations"""
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
issues = []
|
||||
warnings = []
|
||||
recommendations = []
|
||||
|
||||
# Title analysis
|
||||
title_tag = soup.find('title')
|
||||
if not title_tag:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': 'Missing title tag',
|
||||
'location': '<head>',
|
||||
'fix': 'Add title tag to head section',
|
||||
'code_example': '<title>Your Page Title</title>',
|
||||
'action': 'add_title_tag'
|
||||
})
|
||||
else:
|
||||
title_text = title_tag.get_text().strip()
|
||||
if len(title_text) < 30:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'Title too short ({len(title_text)} characters)',
|
||||
'location': '<title>',
|
||||
'current_value': title_text,
|
||||
'fix': 'Make title 30-60 characters',
|
||||
'code_example': f'<title>{title_text} - Additional Context</title>',
|
||||
'action': 'extend_title'
|
||||
})
|
||||
elif len(title_text) > 60:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'Title too long ({len(title_text)} characters)',
|
||||
'location': '<title>',
|
||||
'current_value': title_text,
|
||||
'fix': 'Shorten title to 30-60 characters',
|
||||
'code_example': f'<title>{title_text[:55]}...</title>',
|
||||
'action': 'shorten_title'
|
||||
})
|
||||
|
||||
# Meta description analysis
|
||||
meta_desc = soup.find('meta', attrs={'name': 'description'})
|
||||
if not meta_desc:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': 'Missing meta description',
|
||||
'location': '<head>',
|
||||
'fix': 'Add meta description',
|
||||
'code_example': '<meta name="description" content="Your page description here">',
|
||||
'action': 'add_meta_description'
|
||||
})
|
||||
else:
|
||||
desc_content = meta_desc.get('content', '').strip()
|
||||
if len(desc_content) < 70:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'Meta description too short ({len(desc_content)} characters)',
|
||||
'location': '<meta name="description">',
|
||||
'current_value': desc_content,
|
||||
'fix': 'Extend description to 70-160 characters',
|
||||
'code_example': f'<meta name="description" content="{desc_content} - Additional context about your page">',
|
||||
'action': 'extend_meta_description'
|
||||
})
|
||||
elif len(desc_content) > 160:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'Meta description too long ({len(desc_content)} characters)',
|
||||
'location': '<meta name="description">',
|
||||
'current_value': desc_content,
|
||||
'fix': 'Shorten description to 70-160 characters',
|
||||
'code_example': f'<meta name="description" content="{desc_content[:155]}...">',
|
||||
'action': 'shorten_meta_description'
|
||||
})
|
||||
|
||||
# Viewport meta tag
|
||||
viewport = soup.find('meta', attrs={'name': 'viewport'})
|
||||
if not viewport:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': 'Missing viewport meta tag',
|
||||
'location': '<head>',
|
||||
'fix': 'Add viewport meta tag for mobile optimization',
|
||||
'code_example': '<meta name="viewport" content="width=device-width, initial-scale=1.0">',
|
||||
'action': 'add_viewport_meta'
|
||||
})
|
||||
|
||||
# Charset declaration
|
||||
charset = soup.find('meta', attrs={'charset': True}) or soup.find('meta', attrs={'http-equiv': 'Content-Type'})
|
||||
if not charset:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': 'Missing charset declaration',
|
||||
'location': '<head>',
|
||||
'fix': 'Add charset meta tag',
|
||||
'code_example': '<meta charset="UTF-8">',
|
||||
'action': 'add_charset_meta'
|
||||
})
|
||||
|
||||
score = max(0, 100 - len(issues) * 25 - len(warnings) * 10)
|
||||
|
||||
return {
|
||||
'score': score,
|
||||
'issues': issues,
|
||||
'warnings': warnings,
|
||||
'recommendations': recommendations,
|
||||
'title_length': len(title_tag.get_text().strip()) if title_tag else 0,
|
||||
'description_length': len(meta_desc.get('content', '')) if meta_desc else 0,
|
||||
'has_viewport': bool(viewport),
|
||||
'has_charset': bool(charset)
|
||||
}
|
||||
|
||||
|
||||
class ContentAnalyzer(BaseAnalyzer):
|
||||
"""Analyzes content quality and structure"""
|
||||
|
||||
def analyze(self, html_content: str, url: str) -> Dict[str, Any]:
|
||||
"""Enhanced content analysis with specific text locations"""
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
issues = []
|
||||
warnings = []
|
||||
recommendations = []
|
||||
|
||||
# Get all text content
|
||||
text_content = soup.get_text()
|
||||
words = text_content.split()
|
||||
word_count = len(words)
|
||||
|
||||
# Check word count
|
||||
if word_count < 300:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': f'Content too short ({word_count} words)',
|
||||
'location': 'Page content',
|
||||
'current_value': f'{word_count} words',
|
||||
'fix': 'Add more valuable content (minimum 300 words)',
|
||||
'code_example': 'Add relevant paragraphs with useful information',
|
||||
'action': 'add_more_content'
|
||||
})
|
||||
|
||||
# Check for H1 tags
|
||||
h1_tags = soup.find_all('h1')
|
||||
if len(h1_tags) == 0:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': 'Missing H1 tag',
|
||||
'location': 'Page structure',
|
||||
'fix': 'Add one H1 tag per page',
|
||||
'code_example': '<h1>Your Main Page Title</h1>',
|
||||
'action': 'add_h1_tag'
|
||||
})
|
||||
elif len(h1_tags) > 1:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'Multiple H1 tags found ({len(h1_tags)})',
|
||||
'location': 'Page structure',
|
||||
'current_value': f'{len(h1_tags)} H1 tags',
|
||||
'fix': 'Use only one H1 tag per page',
|
||||
'code_example': 'Keep only the main H1, change others to H2',
|
||||
'action': 'reduce_h1_tags'
|
||||
})
|
||||
|
||||
# Check for images without alt text
|
||||
images = soup.find_all('img')
|
||||
images_without_alt = [img for img in images if not img.get('alt')]
|
||||
if images_without_alt:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'Images without alt text ({len(images_without_alt)} found)',
|
||||
'location': 'Images',
|
||||
'current_value': f'{len(images_without_alt)} images without alt',
|
||||
'fix': 'Add descriptive alt text to all images',
|
||||
'code_example': '<img src="image.jpg" alt="Descriptive text about the image">',
|
||||
'action': 'add_alt_text'
|
||||
})
|
||||
|
||||
# Check for internal links
|
||||
internal_links = soup.find_all('a', href=re.compile(r'^[^http]'))
|
||||
if len(internal_links) < 3:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'Few internal links ({len(internal_links)} found)',
|
||||
'location': 'Page content',
|
||||
'current_value': f'{len(internal_links)} internal links',
|
||||
'fix': 'Add more internal links to improve site structure',
|
||||
'code_example': '<a href="/related-page">Related content</a>',
|
||||
'action': 'add_internal_links'
|
||||
})
|
||||
|
||||
# Check for spelling errors (basic check)
|
||||
common_words = ['the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by']
|
||||
potential_errors = []
|
||||
for word in words[:100]: # Check first 100 words
|
||||
if len(word) > 3 and word.lower() not in common_words:
|
||||
# Basic spell check (this is simplified - in production you'd use a proper spell checker)
|
||||
if re.search(r'[a-z]{15,}', word.lower()): # Very long words might be misspelled
|
||||
potential_errors.append(word)
|
||||
|
||||
if potential_errors:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': f'Potential spelling errors found: {", ".join(potential_errors[:5])}',
|
||||
'location': 'Page content',
|
||||
'current_value': f'{len(potential_errors)} potential errors',
|
||||
'fix': 'Review and correct spelling errors',
|
||||
'code_example': 'Use spell checker or proofread content',
|
||||
'action': 'fix_spelling'
|
||||
})
|
||||
|
||||
score = max(0, 100 - len(issues) * 25 - len(warnings) * 10)
|
||||
|
||||
return {
|
||||
'score': score,
|
||||
'issues': issues,
|
||||
'warnings': warnings,
|
||||
'recommendations': recommendations,
|
||||
'word_count': word_count,
|
||||
'h1_count': len(h1_tags),
|
||||
'images_count': len(images),
|
||||
'images_without_alt': len(images_without_alt),
|
||||
'internal_links_count': len(internal_links),
|
||||
'potential_spelling_errors': len(potential_errors)
|
||||
}
|
||||
|
||||
|
||||
class TechnicalSEOAnalyzer(BaseAnalyzer):
|
||||
"""Analyzes technical SEO elements"""
|
||||
|
||||
def analyze(self, html_content: str, url: str) -> Dict[str, Any]:
|
||||
"""Enhanced technical SEO analysis with specific fixes"""
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
issues = []
|
||||
warnings = []
|
||||
recommendations = []
|
||||
|
||||
# Check for robots.txt
|
||||
robots_url = urljoin(url, '/robots.txt')
|
||||
try:
|
||||
robots_response = self.session.get(robots_url, timeout=5)
|
||||
if robots_response.status_code != 200:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': 'Robots.txt not accessible',
|
||||
'location': 'Server',
|
||||
'fix': 'Create robots.txt file',
|
||||
'code_example': 'User-agent: *\nAllow: /',
|
||||
'action': 'create_robots_txt'
|
||||
})
|
||||
except:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': 'Robots.txt not found',
|
||||
'location': 'Server',
|
||||
'fix': 'Create robots.txt file',
|
||||
'code_example': 'User-agent: *\nAllow: /',
|
||||
'action': 'create_robots_txt'
|
||||
})
|
||||
|
||||
# Check for sitemap
|
||||
sitemap_url = urljoin(url, '/sitemap.xml')
|
||||
try:
|
||||
sitemap_response = self.session.get(sitemap_url, timeout=5)
|
||||
if sitemap_response.status_code != 200:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': 'Sitemap not accessible',
|
||||
'location': 'Server',
|
||||
'fix': 'Create XML sitemap',
|
||||
'code_example': '<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n<url>\n<loc>https://example.com/</loc>\n</url>\n</urlset>',
|
||||
'action': 'create_sitemap'
|
||||
})
|
||||
except:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': 'Sitemap not found',
|
||||
'location': 'Server',
|
||||
'fix': 'Create XML sitemap',
|
||||
'code_example': '<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n<url>\n<loc>https://example.com/</loc>\n</url>\n</urlset>',
|
||||
'action': 'create_sitemap'
|
||||
})
|
||||
|
||||
# Check for structured data
|
||||
structured_data = soup.find_all('script', type='application/ld+json')
|
||||
if not structured_data:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': 'No structured data found',
|
||||
'location': '<head> or <body>',
|
||||
'fix': 'Add structured data markup',
|
||||
'code_example': '<script type="application/ld+json">{"@context":"https://schema.org","@type":"WebPage","name":"Page Title"}</script>',
|
||||
'action': 'add_structured_data'
|
||||
})
|
||||
|
||||
# Check for canonical URL
|
||||
canonical = soup.find('link', rel='canonical')
|
||||
if not canonical:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': 'Missing canonical URL',
|
||||
'location': '<head>',
|
||||
'fix': 'Add canonical URL',
|
||||
'code_example': '<link rel="canonical" href="https://example.com/page">',
|
||||
'action': 'add_canonical_url'
|
||||
})
|
||||
|
||||
score = max(0, 100 - len(issues) * 25 - len(warnings) * 10)
|
||||
|
||||
return {
|
||||
'score': score,
|
||||
'issues': issues,
|
||||
'warnings': warnings,
|
||||
'recommendations': recommendations,
|
||||
'has_robots_txt': len([w for w in warnings if 'robots.txt' in w['message']]) == 0,
|
||||
'has_sitemap': len([w for w in warnings if 'sitemap' in w['message']]) == 0,
|
||||
'has_structured_data': bool(structured_data),
|
||||
'has_canonical': bool(canonical)
|
||||
}
|
||||
|
||||
|
||||
class PerformanceAnalyzer(BaseAnalyzer):
|
||||
"""Analyzes page performance"""
|
||||
|
||||
def analyze(self, url: str) -> Dict[str, Any]:
|
||||
"""Enhanced performance analysis with specific fixes"""
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = self.session.get(url, timeout=20)
|
||||
load_time = time.time() - start_time
|
||||
|
||||
issues = []
|
||||
warnings = []
|
||||
recommendations = []
|
||||
|
||||
# Check load time
|
||||
if load_time > 3:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': f'Page load time too slow ({load_time:.2f}s)',
|
||||
'location': 'Page performance',
|
||||
'current_value': f'{load_time:.2f}s',
|
||||
'fix': 'Optimize page speed (target < 3 seconds)',
|
||||
'code_example': 'Optimize images, minify CSS/JS, use CDN',
|
||||
'action': 'optimize_page_speed'
|
||||
})
|
||||
elif load_time > 2:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'Page load time could be improved ({load_time:.2f}s)',
|
||||
'location': 'Page performance',
|
||||
'current_value': f'{load_time:.2f}s',
|
||||
'fix': 'Optimize for faster loading',
|
||||
'code_example': 'Compress images, enable caching',
|
||||
'action': 'improve_page_speed'
|
||||
})
|
||||
|
||||
# Check for compression
|
||||
content_encoding = response.headers.get('Content-Encoding')
|
||||
if not content_encoding:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': 'No compression detected',
|
||||
'location': 'Server configuration',
|
||||
'fix': 'Enable GZIP compression',
|
||||
'code_example': 'Add to .htaccess: SetOutputFilter DEFLATE',
|
||||
'action': 'enable_compression'
|
||||
})
|
||||
|
||||
# Check for caching headers
|
||||
cache_headers = ['Cache-Control', 'Expires', 'ETag']
|
||||
has_cache = any(response.headers.get(header) for header in cache_headers)
|
||||
if not has_cache:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': 'No caching headers found',
|
||||
'location': 'Server configuration',
|
||||
'fix': 'Add caching headers',
|
||||
'code_example': 'Cache-Control: max-age=31536000',
|
||||
'action': 'add_caching_headers'
|
||||
})
|
||||
|
||||
score = max(0, 100 - len(issues) * 25 - len(warnings) * 10)
|
||||
|
||||
return {
|
||||
'score': score,
|
||||
'load_time': load_time,
|
||||
'is_compressed': bool(content_encoding),
|
||||
'has_cache': has_cache,
|
||||
'issues': issues,
|
||||
'warnings': warnings,
|
||||
'recommendations': recommendations
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"Performance analysis failed for {url}: {e}")
|
||||
return {
|
||||
'score': 0, 'error': f'Performance analysis failed: {str(e)}',
|
||||
'load_time': 0, 'is_compressed': False, 'has_cache': False,
|
||||
'issues': [{'type': 'critical', 'message': 'Performance analysis failed', 'location': 'Page', 'fix': 'Check page speed manually', 'action': 'manual_check'}],
|
||||
'warnings': [{'type': 'warning', 'message': 'Could not analyze performance', 'location': 'Page', 'fix': 'Use PageSpeed Insights', 'action': 'manual_check'}],
|
||||
'recommendations': [{'type': 'recommendation', 'message': 'Check page speed manually', 'priority': 'medium', 'action': 'manual_check'}]
|
||||
}
|
||||
|
||||
|
||||
class AccessibilityAnalyzer(BaseAnalyzer):
|
||||
"""Analyzes accessibility features"""
|
||||
|
||||
def analyze(self, html_content: str) -> Dict[str, Any]:
|
||||
"""Enhanced accessibility analysis with specific fixes"""
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
issues = []
|
||||
warnings = []
|
||||
recommendations = []
|
||||
|
||||
# Check for alt text on images
|
||||
images = soup.find_all('img')
|
||||
images_without_alt = [img for img in images if not img.get('alt')]
|
||||
if images_without_alt:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': f'Images without alt text ({len(images_without_alt)} found)',
|
||||
'location': 'Images',
|
||||
'current_value': f'{len(images_without_alt)} images without alt',
|
||||
'fix': 'Add descriptive alt text to all images',
|
||||
'code_example': '<img src="image.jpg" alt="Descriptive text about the image">',
|
||||
'action': 'add_alt_text'
|
||||
})
|
||||
|
||||
# Check for form labels
|
||||
forms = soup.find_all('form')
|
||||
for form in forms:
|
||||
inputs = form.find_all(['input', 'textarea', 'select'])
|
||||
for input_elem in inputs:
|
||||
if input_elem.get('type') not in ['hidden', 'submit', 'button']:
|
||||
input_id = input_elem.get('id')
|
||||
if input_id:
|
||||
label = soup.find('label', attrs={'for': input_id})
|
||||
if not label:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'Input without label (ID: {input_id})',
|
||||
'location': 'Form',
|
||||
'current_value': f'Input ID: {input_id}',
|
||||
'fix': 'Add label for input field',
|
||||
'code_example': f'<label for="{input_id}">Field Label</label>',
|
||||
'action': 'add_form_label'
|
||||
})
|
||||
|
||||
# Check for heading hierarchy
|
||||
headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
||||
if headings:
|
||||
h1_count = len([h for h in headings if h.name == 'h1'])
|
||||
if h1_count == 0:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': 'No H1 heading found',
|
||||
'location': 'Page structure',
|
||||
'fix': 'Add H1 heading for main content',
|
||||
'code_example': '<h1>Main Page Heading</h1>',
|
||||
'action': 'add_h1_heading'
|
||||
})
|
||||
|
||||
# Check for color contrast (basic check)
|
||||
style_tags = soup.find_all('style')
|
||||
inline_styles = soup.find_all(style=True)
|
||||
if style_tags or inline_styles:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': 'Custom styles found - check color contrast',
|
||||
'location': 'CSS',
|
||||
'fix': 'Ensure sufficient color contrast (4.5:1 for normal text)',
|
||||
'code_example': 'Use tools like WebAIM Contrast Checker',
|
||||
'action': 'check_color_contrast'
|
||||
})
|
||||
|
||||
score = max(0, 100 - len(issues) * 25 - len(warnings) * 10)
|
||||
|
||||
return {
|
||||
'score': score,
|
||||
'issues': issues,
|
||||
'warnings': warnings,
|
||||
'recommendations': recommendations,
|
||||
'images_count': len(images),
|
||||
'images_without_alt': len(images_without_alt),
|
||||
'forms_count': len(forms),
|
||||
'headings_count': len(headings)
|
||||
}
|
||||
|
||||
|
||||
class UserExperienceAnalyzer(BaseAnalyzer):
|
||||
"""Analyzes user experience elements"""
|
||||
|
||||
def analyze(self, html_content: str, url: str) -> Dict[str, Any]:
|
||||
"""Enhanced user experience analysis with specific fixes"""
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
issues = []
|
||||
warnings = []
|
||||
recommendations = []
|
||||
|
||||
# Check for mobile responsiveness indicators
|
||||
viewport = soup.find('meta', attrs={'name': 'viewport'})
|
||||
if not viewport:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': 'Missing viewport meta tag for mobile',
|
||||
'location': '<head>',
|
||||
'fix': 'Add viewport meta tag',
|
||||
'code_example': '<meta name="viewport" content="width=device-width, initial-scale=1.0">',
|
||||
'action': 'add_viewport_meta'
|
||||
})
|
||||
|
||||
# Check for navigation menu
|
||||
nav_elements = soup.find_all(['nav', 'ul', 'ol'])
|
||||
if not nav_elements:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': 'No navigation menu found',
|
||||
'location': 'Page structure',
|
||||
'fix': 'Add navigation menu',
|
||||
'code_example': '<nav><ul><li><a href="/">Home</a></li></ul></nav>',
|
||||
'action': 'add_navigation'
|
||||
})
|
||||
|
||||
# Check for contact information
|
||||
contact_patterns = ['contact', 'phone', 'email', '@', 'tel:']
|
||||
page_text = soup.get_text().lower()
|
||||
has_contact = any(pattern in page_text for pattern in contact_patterns)
|
||||
if not has_contact:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': 'No contact information found',
|
||||
'location': 'Page content',
|
||||
'fix': 'Add contact information',
|
||||
'code_example': '<p>Contact us: <a href="mailto:info@example.com">info@example.com</a></p>',
|
||||
'action': 'add_contact_info'
|
||||
})
|
||||
|
||||
# Check for social media links
|
||||
social_patterns = ['facebook', 'twitter', 'linkedin', 'instagram']
|
||||
has_social = any(pattern in page_text for pattern in social_patterns)
|
||||
if not has_social:
|
||||
recommendations.append({
|
||||
'type': 'recommendation',
|
||||
'message': 'No social media links found',
|
||||
'location': 'Page content',
|
||||
'fix': 'Add social media links',
|
||||
'code_example': '<a href="https://facebook.com/yourpage">Facebook</a>',
|
||||
'action': 'add_social_links',
|
||||
'priority': 'low'
|
||||
})
|
||||
|
||||
score = max(0, 100 - len(issues) * 25 - len(warnings) * 10)
|
||||
|
||||
return {
|
||||
'score': score,
|
||||
'issues': issues,
|
||||
'warnings': warnings,
|
||||
'recommendations': recommendations,
|
||||
'has_viewport': bool(viewport),
|
||||
'has_navigation': bool(nav_elements),
|
||||
'has_contact': has_contact,
|
||||
'has_social': has_social
|
||||
}
|
||||
|
||||
|
||||
class SecurityHeadersAnalyzer(BaseAnalyzer):
|
||||
"""Analyzes security headers"""
|
||||
|
||||
def analyze(self, url: str) -> Dict[str, Any]:
|
||||
"""Enhanced security headers analysis with specific fixes"""
|
||||
try:
|
||||
response = self.session.get(url, timeout=15, allow_redirects=True)
|
||||
security_headers = {
|
||||
'X-Frame-Options': response.headers.get('X-Frame-Options'),
|
||||
'X-Content-Type-Options': response.headers.get('X-Content-Type-Options'),
|
||||
'X-XSS-Protection': response.headers.get('X-XSS-Protection'),
|
||||
'Strict-Transport-Security': response.headers.get('Strict-Transport-Security'),
|
||||
'Content-Security-Policy': response.headers.get('Content-Security-Policy'),
|
||||
'Referrer-Policy': response.headers.get('Referrer-Policy')
|
||||
}
|
||||
|
||||
issues = []
|
||||
warnings = []
|
||||
recommendations = []
|
||||
present_headers = []
|
||||
missing_headers = []
|
||||
|
||||
for header_name, header_value in security_headers.items():
|
||||
if header_value:
|
||||
present_headers.append(header_name)
|
||||
else:
|
||||
missing_headers.append(header_name)
|
||||
if header_name in ['X-Frame-Options', 'X-Content-Type-Options']:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': f'Missing {header_name} header',
|
||||
'location': 'Server configuration',
|
||||
'fix': f'Add {header_name} header',
|
||||
'code_example': f'{header_name}: DENY' if header_name == 'X-Frame-Options' else f'{header_name}: nosniff',
|
||||
'action': f'add_{header_name.lower().replace("-", "_")}_header'
|
||||
})
|
||||
else:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'Missing {header_name} header',
|
||||
'location': 'Server configuration',
|
||||
'fix': f'Add {header_name} header for better security',
|
||||
'code_example': f'{header_name}: max-age=31536000',
|
||||
'action': f'add_{header_name.lower().replace("-", "_")}_header'
|
||||
})
|
||||
|
||||
score = min(100, len(present_headers) * 16)
|
||||
|
||||
return {
|
||||
'score': score,
|
||||
'present_headers': present_headers,
|
||||
'missing_headers': missing_headers,
|
||||
'total_headers': len(present_headers),
|
||||
'issues': issues,
|
||||
'warnings': warnings,
|
||||
'recommendations': recommendations
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"Security headers analysis failed for {url}: {e}")
|
||||
return {
|
||||
'score': 0, 'error': f'Error analyzing headers: {str(e)}',
|
||||
'present_headers': [], 'missing_headers': ['All security headers'],
|
||||
'total_headers': 0, 'issues': [{'type': 'critical', 'message': 'Could not analyze security headers', 'location': 'Server', 'fix': 'Check security headers manually', 'action': 'manual_check'}],
|
||||
'warnings': [{'type': 'warning', 'message': 'Security headers analysis failed', 'location': 'Server', 'fix': 'Verify security headers manually', 'action': 'manual_check'}],
|
||||
'recommendations': [{'type': 'recommendation', 'message': 'Check security headers manually', 'priority': 'medium', 'action': 'manual_check'}]
|
||||
}
|
||||
|
||||
|
||||
class KeywordAnalyzer(BaseAnalyzer):
|
||||
"""Analyzes keyword usage and optimization"""
|
||||
|
||||
def analyze(self, html_content: str, target_keywords: Optional[List[str]] = None) -> Dict[str, Any]:
|
||||
"""Enhanced keyword analysis with specific locations"""
|
||||
if not target_keywords:
|
||||
return {'score': 0, 'issues': [], 'warnings': [], 'recommendations': []}
|
||||
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
issues = []
|
||||
warnings = []
|
||||
recommendations = []
|
||||
|
||||
page_text = soup.get_text().lower()
|
||||
title_text = soup.find('title')
|
||||
title_text = title_text.get_text().lower() if title_text else ""
|
||||
|
||||
for keyword in target_keywords:
|
||||
keyword_lower = keyword.lower()
|
||||
|
||||
# Check if keyword is in title
|
||||
if keyword_lower not in title_text:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': f'Target keyword "{keyword}" not in title',
|
||||
'location': '<title>',
|
||||
'current_value': title_text,
|
||||
'fix': f'Include keyword "{keyword}" in title',
|
||||
'code_example': f'<title>{keyword} - Your Page Title</title>',
|
||||
'action': 'add_keyword_to_title'
|
||||
})
|
||||
|
||||
# Check keyword density
|
||||
keyword_count = page_text.count(keyword_lower)
|
||||
if keyword_count == 0:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': f'Target keyword "{keyword}" not found in content',
|
||||
'location': 'Page content',
|
||||
'current_value': '0 occurrences',
|
||||
'fix': f'Include keyword "{keyword}" naturally in content',
|
||||
'code_example': f'Add "{keyword}" to your page content',
|
||||
'action': 'add_keyword_to_content'
|
||||
})
|
||||
elif keyword_count < 2:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'Target keyword "{keyword}" appears only {keyword_count} time(s)',
|
||||
'location': 'Page content',
|
||||
'current_value': f'{keyword_count} occurrence(s)',
|
||||
'fix': f'Include keyword "{keyword}" more naturally',
|
||||
'code_example': f'Add more instances of "{keyword}" to content',
|
||||
'action': 'increase_keyword_density'
|
||||
})
|
||||
|
||||
score = max(0, 100 - len(issues) * 25 - len(warnings) * 10)
|
||||
|
||||
return {
|
||||
'score': score,
|
||||
'issues': issues,
|
||||
'warnings': warnings,
|
||||
'recommendations': recommendations,
|
||||
'target_keywords': target_keywords,
|
||||
'keywords_found': [kw for kw in target_keywords if kw.lower() in page_text]
|
||||
}
|
||||
Reference in New Issue
Block a user