Files
ALwrity/backend/services/content_gap_analyzer/website_analyzer.py
2025-08-06 12:48:02 +05:30

558 lines
20 KiB
Python

"""
Website Analyzer Service
Converted from website_analyzer.py for FastAPI integration.
"""
from typing import Dict, Any, List, Optional
from sqlalchemy.orm import Session
from loguru import logger
from datetime import datetime
import asyncio
import json
from collections import Counter, defaultdict
# Import existing modules (will be updated to use FastAPI services)
from services.database import get_db_session
from .ai_engine_service import AIEngineService
class WebsiteAnalyzer:
"""Analyzes website content structure and performance."""
def __init__(self):
"""Initialize the website analyzer."""
self.ai_engine = AIEngineService()
logger.info("WebsiteAnalyzer initialized")
async def analyze_website(self, url: str, industry: str = "general") -> Dict[str, Any]:
"""
Analyze website content and structure.
Args:
url: Website URL to analyze
industry: Industry category
Returns:
Website analysis results
"""
try:
logger.info(f"Starting website analysis for {url}")
results = {
'website_url': url,
'industry': industry,
'content_analysis': {},
'structure_analysis': {},
'performance_analysis': {},
'seo_analysis': {},
'ai_insights': {},
'analysis_timestamp': datetime.utcnow().isoformat()
}
# Analyze content structure
content_analysis = await self._analyze_content_structure(url)
results['content_analysis'] = content_analysis
# Analyze website structure
structure_analysis = await self._analyze_website_structure(url)
results['structure_analysis'] = structure_analysis
# Analyze performance metrics
performance_analysis = await self._analyze_performance_metrics(url)
results['performance_analysis'] = performance_analysis
# Analyze SEO aspects
seo_analysis = await self._analyze_seo_aspects(url)
results['seo_analysis'] = seo_analysis
# Generate AI insights
ai_insights = await self._generate_ai_insights(results)
results['ai_insights'] = ai_insights
logger.info(f"Website analysis completed for {url}")
return results
except Exception as e:
logger.error(f"Error in website analysis: {str(e)}")
return {}
async def _analyze_content_structure(self, url: str) -> Dict[str, Any]:
"""
Analyze content structure of the website.
Args:
url: Website URL
Returns:
Content structure analysis results
"""
try:
logger.info(f"Analyzing content structure for {url}")
# TODO: Integrate with actual content analysis service
# This will crawl and analyze website content
# Simulate content structure analysis
content_analysis = {
'total_pages': 150,
'content_types': {
'blog_posts': 80,
'product_pages': 30,
'landing_pages': 20,
'guides': 20
},
'content_topics': [
'Industry trends',
'Best practices',
'Case studies',
'Tutorials',
'Expert insights',
'Product information',
'Company news',
'Customer testimonials'
],
'content_depth': {
'shallow': 20,
'medium': 60,
'deep': 70
},
'content_quality_score': 8.5,
'content_freshness': {
'recent': 40,
'moderate': 50,
'outdated': 10
},
'content_engagement': {
'avg_time_on_page': 180,
'bounce_rate': 0.35,
'pages_per_session': 2.5,
'social_shares': 45
}
}
logger.info("Content structure analysis completed")
return content_analysis
except Exception as e:
logger.error(f"Error in content structure analysis: {str(e)}")
return {}
async def _analyze_website_structure(self, url: str) -> Dict[str, Any]:
"""
Analyze website structure and navigation.
Args:
url: Website URL
Returns:
Website structure analysis results
"""
try:
logger.info(f"Analyzing website structure for {url}")
# TODO: Integrate with actual structure analysis service
# This will analyze website architecture and navigation
# Simulate website structure analysis
structure_analysis = {
'navigation_structure': {
'main_menu_items': 8,
'footer_links': 15,
'breadcrumb_usage': True,
'sitemap_available': True
},
'url_structure': {
'avg_url_length': 45,
'seo_friendly_urls': True,
'url_depth': 3,
'canonical_urls': True
},
'internal_linking': {
'avg_internal_links_per_page': 8,
'link_anchor_text_optimization': 75,
'broken_links': 2,
'orphaned_pages': 5
},
'mobile_friendliness': {
'responsive_design': True,
'mobile_optimized': True,
'touch_friendly': True,
'mobile_speed': 85
},
'page_speed': {
'desktop_speed': 85,
'mobile_speed': 75,
'first_contentful_paint': 1.2,
'largest_contentful_paint': 2.5
}
}
logger.info("Website structure analysis completed")
return structure_analysis
except Exception as e:
logger.error(f"Error in website structure analysis: {str(e)}")
return {}
async def _analyze_performance_metrics(self, url: str) -> Dict[str, Any]:
"""
Analyze website performance metrics.
Args:
url: Website URL
Returns:
Performance metrics analysis results
"""
try:
logger.info(f"Analyzing performance metrics for {url}")
# TODO: Integrate with actual performance analysis service
# This will analyze website performance metrics
# Simulate performance metrics analysis
performance_analysis = {
'traffic_metrics': {
'monthly_visitors': '50K+',
'page_views': '150K+',
'unique_visitors': '35K+',
'traffic_growth': '15%'
},
'engagement_metrics': {
'avg_session_duration': '3:45',
'bounce_rate': '35%',
'pages_per_session': 2.5,
'return_visitor_rate': '25%'
},
'conversion_metrics': {
'conversion_rate': '3.5%',
'lead_generation': '500+ monthly',
'sales_conversion': '2.1%',
'email_signups': '200+ monthly'
},
'social_metrics': {
'social_shares': 45,
'social_comments': 12,
'social_engagement_rate': '8.5%',
'social_reach': '10K+'
},
'technical_metrics': {
'page_load_time': 2.1,
'server_response_time': 0.8,
'time_to_interactive': 3.2,
'cumulative_layout_shift': 0.1
}
}
logger.info("Performance metrics analysis completed")
return performance_analysis
except Exception as e:
logger.error(f"Error in performance metrics analysis: {str(e)}")
return {}
async def _analyze_seo_aspects(self, url: str) -> Dict[str, Any]:
"""
Analyze SEO aspects of the website.
Args:
url: Website URL
Returns:
SEO analysis results
"""
try:
logger.info(f"Analyzing SEO aspects for {url}")
# TODO: Integrate with actual SEO analysis service
# This will analyze SEO aspects of the website
# Simulate SEO analysis
seo_analysis = {
'technical_seo': {
'title_tag_optimization': 85,
'meta_description_optimization': 80,
'h1_usage': 95,
'image_alt_text': 70,
'schema_markup': True,
'ssl_certificate': True
},
'on_page_seo': {
'keyword_density': 2.5,
'internal_linking': 8,
'external_linking': 3,
'content_length': 1200,
'readability_score': 75
},
'off_page_seo': {
'domain_authority': 65,
'backlinks': 2500,
'referring_domains': 150,
'social_signals': 45
},
'keyword_rankings': {
'ranking_keywords': 85,
'top_10_rankings': 25,
'top_3_rankings': 8,
'featured_snippets': 3
},
'mobile_seo': {
'mobile_friendly': True,
'mobile_speed': 75,
'mobile_usability': 90,
'amp_pages': 0
},
'local_seo': {
'google_my_business': True,
'local_citations': 45,
'local_keywords': 12,
'local_rankings': 8
}
}
logger.info("SEO analysis completed")
return seo_analysis
except Exception as e:
logger.error(f"Error in SEO analysis: {str(e)}")
return {}
async def _generate_ai_insights(self, analysis_results: Dict[str, Any]) -> Dict[str, Any]:
"""
Generate AI-powered insights for website analysis.
Args:
analysis_results: Complete website analysis results
Returns:
AI-generated insights
"""
try:
logger.info("🤖 Generating AI-powered website insights")
# Prepare analysis summary for AI
analysis_summary = {
'url': analysis_results.get('website_url', ''),
'industry': analysis_results.get('industry', ''),
'content_count': analysis_results.get('content_analysis', {}).get('total_pages', 0),
'content_quality': analysis_results.get('content_analysis', {}).get('content_quality_score', 0),
'performance_score': analysis_results.get('performance_analysis', {}).get('traffic_metrics', {}).get('monthly_visitors', ''),
'seo_score': analysis_results.get('seo_analysis', {}).get('technical_seo', {}).get('title_tag_optimization', 0)
}
# Generate comprehensive AI insights using AI engine
ai_insights = await self.ai_engine.analyze_website_performance(analysis_summary)
if ai_insights:
logger.info("✅ Generated comprehensive AI website insights")
return ai_insights
else:
logger.warning("⚠️ Could not generate AI website insights")
return {}
except Exception as e:
logger.error(f"Error generating AI website insights: {str(e)}")
return {}
async def analyze_content_quality(self, url: str) -> Dict[str, Any]:
"""
Analyze content quality of the website.
Args:
url: Website URL
Returns:
Content quality analysis results
"""
try:
logger.info(f"Analyzing content quality for {url}")
# TODO: Integrate with actual content quality analysis service
# This will analyze content quality metrics
# Simulate content quality analysis
quality_analysis = {
'overall_quality_score': 8.5,
'quality_dimensions': {
'readability': 8.0,
'comprehensiveness': 9.0,
'accuracy': 8.5,
'engagement': 7.5,
'seo_optimization': 8.0
},
'content_strengths': [
'Comprehensive topic coverage',
'Expert-level insights',
'Clear structure and organization',
'Accurate information',
'Good readability'
],
'content_weaknesses': [
'Limited visual content',
'Missing interactive elements',
'Outdated information in some areas',
'Inconsistent content depth'
],
'improvement_areas': [
{
'area': 'Visual Content',
'current_score': 6.0,
'target_score': 9.0,
'improvement_suggestions': [
'Add more images and infographics',
'Include video content',
'Create visual guides',
'Add interactive elements'
]
},
{
'area': 'Content Freshness',
'current_score': 7.0,
'target_score': 9.0,
'improvement_suggestions': [
'Update outdated content',
'Add recent industry insights',
'Include current trends',
'Regular content audits'
]
}
]
}
logger.info("Content quality analysis completed")
return quality_analysis
except Exception as e:
logger.error(f"Error in content quality analysis: {str(e)}")
return {}
async def analyze_user_experience(self, url: str) -> Dict[str, Any]:
"""
Analyze user experience aspects of the website.
Args:
url: Website URL
Returns:
User experience analysis results
"""
try:
logger.info(f"Analyzing user experience for {url}")
# TODO: Integrate with actual UX analysis service
# This will analyze user experience metrics
# Simulate UX analysis
ux_analysis = {
'navigation_experience': {
'menu_clarity': 8.5,
'search_functionality': 7.0,
'breadcrumb_navigation': 9.0,
'mobile_navigation': 8.0
},
'content_accessibility': {
'font_readability': 8.5,
'color_contrast': 9.0,
'alt_text_usage': 7.5,
'keyboard_navigation': 8.0
},
'page_speed_experience': {
'loading_perception': 7.5,
'interactive_elements': 8.0,
'smooth_scrolling': 8.5,
'mobile_performance': 7.0
},
'content_engagement': {
'content_clarity': 8.5,
'call_to_action_visibility': 7.5,
'content_scannability': 8.0,
'information_architecture': 8.5
},
'overall_ux_score': 8.2,
'improvement_suggestions': [
'Improve search functionality',
'Add more visual content',
'Optimize mobile experience',
'Enhance call-to-action visibility'
]
}
logger.info("User experience analysis completed")
return ux_analysis
except Exception as e:
logger.error(f"Error in user experience analysis: {str(e)}")
return {}
async def get_website_summary(self, analysis_id: str) -> Dict[str, Any]:
"""
Get a summary of website analysis.
Args:
analysis_id: Analysis identifier
Returns:
Website analysis summary
"""
try:
logger.info(f"Getting website analysis summary for {analysis_id}")
# TODO: Retrieve analysis from database
# This will be implemented when database integration is complete
summary = {
'analysis_id': analysis_id,
'pages_analyzed': 25,
'content_score': 8.5,
'seo_score': 7.8,
'user_experience_score': 8.2,
'improvement_areas': [
'Content depth and comprehensiveness',
'SEO optimization',
'Mobile responsiveness'
],
'timestamp': datetime.utcnow().isoformat()
}
return summary
except Exception as e:
logger.error(f"Error getting website summary: {str(e)}")
return {}
async def health_check(self) -> Dict[str, Any]:
"""
Health check for the website analyzer service.
Returns:
Health status information
"""
try:
logger.info("Performing health check for WebsiteAnalyzer")
health_status = {
'service': 'WebsiteAnalyzer',
'status': 'healthy',
'dependencies': {
'ai_engine': 'operational'
},
'capabilities': {
'content_analysis': 'operational',
'structure_analysis': 'operational',
'performance_analysis': 'operational',
'seo_analysis': 'operational'
},
'timestamp': datetime.utcnow().isoformat()
}
logger.info("WebsiteAnalyzer health check passed")
return health_status
except Exception as e:
logger.error(f"WebsiteAnalyzer health check failed: {str(e)}")
return {
'service': 'WebsiteAnalyzer',
'status': 'unhealthy',
'error': str(e),
'timestamp': datetime.utcnow().isoformat()
}