Files
ALwrity/lib/ai_seo_tools/content_gap_analysis/website_analyzer.py

291 lines
12 KiB
Python

"""Website analyzer module for content gap analysis."""
import streamlit as st
from loguru import logger
from typing import Dict, Any, List, Optional
import asyncio
import sys
import os
import json
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer as BaseWebsiteAnalyzer
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
# Configure logger
logger.remove() # Remove default handler
logger.add(
"logs/content_gap_website_analyzer.log",
rotation="50 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
# Ensure logs directory exists
os.makedirs("logs", exist_ok=True)
class WebsiteAnalyzer(BaseWebsiteAnalyzer):
"""Extended website analyzer for content gap analysis."""
def __init__(self):
"""Initialize the website analyzer."""
super().__init__()
logger.info("ContentGapWebsiteAnalyzer initialized")
def analyze_content_gaps(self, url: str, competitor_urls: List[str]) -> Dict[str, Any]:
"""
Analyze content gaps between the target website and competitors.
Args:
url: The target URL to analyze
competitor_urls: List of competitor URLs to compare against
Returns:
Dictionary containing content gap analysis results
"""
try:
# Analyze target website
target_analysis = self.analyze_website(url)
if not target_analysis.get('success', False):
return {
'error': target_analysis.get('error', 'Unknown error in target analysis'),
'gaps': [],
'recommendations': []
}
# Analyze competitor websites
competitor_analyses = []
for competitor_url in competitor_urls:
analysis = self.analyze_website(competitor_url)
if analysis.get('success', False):
competitor_analyses.append(analysis['data'])
# Generate content gap analysis using AI
prompt = f"""Analyze content gaps between the target website and competitors:
Target Website:
{json.dumps(target_analysis['data'], indent=2)}
Competitor Websites:
{json.dumps(competitor_analyses, indent=2)}
Identify:
1. Missing content topics
2. Content depth differences
3. Keyword gaps
4. Content structure improvements
5. Content quality recommendations
Format the response as JSON with 'gaps' and 'recommendations' keys."""
# Get AI analysis
analysis = llm_text_gen(
prompt=prompt,
system_prompt="You are an SEO expert specializing in content gap analysis.",
response_format="json_object"
)
if not analysis:
return {
'error': 'Failed to generate content gap analysis',
'gaps': [],
'recommendations': []
}
return {
'gaps': analysis.get('gaps', []),
'recommendations': analysis.get('recommendations', [])
}
except Exception as e:
error_msg = f"Error analyzing content gaps: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
'error': error_msg,
'gaps': [],
'recommendations': []
}
def analyze(self, url: str) -> Dict[str, Any]:
"""
Analyze a website for content gaps and SEO opportunities.
Args:
url: The URL to analyze
Returns:
Dictionary containing analysis results
"""
try:
# Initialize progress tracking
progress = {
'status': 'in_progress',
'current_stage': 'content_analysis',
'current_step': 'Initializing analysis',
'progress': 0,
'details': 'Starting website analysis...'
}
self.progress.update(progress)
# Get base website analysis
logger.info("Starting base website analysis")
website_analysis = self.analyze_website(url)
if not website_analysis.get('success', False):
error_msg = website_analysis.get('error', 'Unknown error in website analysis')
logger.error(f"Error in website analysis: {error_msg}")
progress['status'] = 'error'
progress['details'] = error_msg
self.progress.update(progress)
return {
'error': error_msg,
'error_details': website_analysis.get('error_details', {}),
'progress': progress
}
# Extract SEO metrics from the analysis
seo_metrics = self._extract_seo_metrics(website_analysis['data'])
# Extract performance metrics
performance_metrics = self._extract_performance_metrics(website_analysis['data'])
# Update progress
progress['status'] = 'completed'
progress['progress'] = 100
progress['details'] = 'Analysis completed successfully'
self.progress.update(progress)
return {
'success': True,
'data': {
'seo_metrics': seo_metrics,
'performance_metrics': performance_metrics,
'website_analysis': website_analysis['data']
},
'progress': progress
}
except Exception as e:
error_msg = f"Error in content gap analysis: {str(e)}"
logger.error(error_msg, exc_info=True)
progress['status'] = 'error'
progress['details'] = error_msg
self.progress.update(progress)
return {
'error': error_msg,
'error_details': {
'type': type(e).__name__,
'traceback': str(e.__traceback__)
},
'progress': progress
}
def _extract_seo_metrics(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
"""Extract SEO-related metrics from website analysis."""
try:
seo_info = website_analysis.get('analysis', {}).get('seo_info', {})
return {
'overall_score': seo_info.get('overall_score', 0),
'meta_tags': {
'title': seo_info.get('meta_tags', {}).get('title', {}),
'description': seo_info.get('meta_tags', {}).get('description', {}),
'keywords': seo_info.get('meta_tags', {}).get('keywords', {})
},
'content': {
'word_count': seo_info.get('content', {}).get('word_count', 0),
'readability_score': seo_info.get('content', {}).get('readability_score', 0),
'content_quality_score': seo_info.get('content', {}).get('content_quality_score', 0)
}
}
except Exception as e:
logger.error(f"Error extracting SEO metrics: {str(e)}", exc_info=True)
return {}
def _extract_performance_metrics(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
"""Extract performance metrics from website analysis."""
try:
performance_info = website_analysis.get('analysis', {}).get('performance', {})
return {
'load_time': performance_info.get('load_time', 0),
'page_size': performance_info.get('page_size', 0),
'resource_count': performance_info.get('resource_count', 0),
'performance_score': performance_info.get('performance_score', 0)
}
except Exception as e:
logger.error(f"Error extracting performance metrics: {str(e)}", exc_info=True)
return {}
def _extract_content_metrics(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
"""Extract content-related metrics from website analysis."""
try:
content_info = website_analysis['analysis']['content_info']
return {
'word_count': content_info.get('word_count', 0),
'heading_count': content_info.get('heading_count', 0),
'image_count': content_info.get('image_count', 0),
'link_count': content_info.get('link_count', 0),
'has_meta_description': content_info.get('has_meta_description', False),
'has_robots_txt': content_info.get('has_robots_txt', False),
'has_sitemap': content_info.get('has_sitemap', False)
}
except Exception as e:
logger.error(f"Error extracting content metrics: {str(e)}", exc_info=True)
return {}
def _extract_technical_info(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
"""Extract technical information from website analysis."""
try:
basic_info = website_analysis.get('analysis', {}).get('basic_info', {})
return {
'title': basic_info.get('title', ''),
'meta_description': basic_info.get('meta_description', ''),
'headers': basic_info.get('headers', {}),
'robots_txt': basic_info.get('robots_txt', ''),
'sitemap': basic_info.get('sitemap', ''),
'server_info': basic_info.get('server_info', {}),
'security_info': basic_info.get('security_info', {})
}
except Exception as e:
logger.error(f"Error extracting technical info: {str(e)}", exc_info=True)
return {}
def _generate_insights(self, content_metrics: Dict[str, Any], seo_metrics: Dict[str, Any]) -> List[str]:
"""Generate content insights based on analysis results."""
try:
insights = []
# Content insights
if content_metrics['word_count'] < 300:
insights.append("Content length is below recommended minimum (300 words)")
elif content_metrics['word_count'] > 2000:
insights.append("Content length is above recommended maximum (2000 words)")
if content_metrics['heading_count'] < 2:
insights.append("Content structure could be improved with more headings")
if content_metrics['image_count'] == 0:
insights.append("Consider adding images to improve content engagement")
# SEO insights
if seo_metrics.get('overall_score', 0) < 60:
insights.append("SEO optimization needs significant improvement")
elif seo_metrics.get('overall_score', 0) < 80:
insights.append("SEO optimization has room for improvement")
if not content_metrics['has_meta_description']:
insights.append("Missing meta description - important for SEO")
if not content_metrics['has_robots_txt']:
insights.append("Missing robots.txt - important for search engine crawling")
if not content_metrics['has_sitemap']:
insights.append("Missing sitemap.xml - important for search engine indexing")
return insights
except Exception as e:
logger.error(f"Error generating insights: {str(e)}", exc_info=True)
return []