558 lines
20 KiB
Python
558 lines
20 KiB
Python
"""
|
|
Website Analyzer Service
|
|
Converted from website_analyzer.py for FastAPI integration.
|
|
"""
|
|
|
|
from typing import Dict, Any, List, Optional
|
|
from sqlalchemy.orm import Session
|
|
from loguru import logger
|
|
from datetime import datetime
|
|
import asyncio
|
|
import json
|
|
from collections import Counter, defaultdict
|
|
|
|
# Import existing modules (will be updated to use FastAPI services)
|
|
from services.database import get_db_session
|
|
from .ai_engine_service import AIEngineService
|
|
|
|
class WebsiteAnalyzer:
|
|
"""Analyzes website content structure and performance."""
|
|
|
|
def __init__(self):
|
|
"""Initialize the website analyzer."""
|
|
self.ai_engine = AIEngineService()
|
|
|
|
logger.info("WebsiteAnalyzer initialized")
|
|
|
|
async def analyze_website(self, url: str, industry: str = "general") -> Dict[str, Any]:
|
|
"""
|
|
Analyze website content and structure.
|
|
|
|
Args:
|
|
url: Website URL to analyze
|
|
industry: Industry category
|
|
|
|
Returns:
|
|
Website analysis results
|
|
"""
|
|
try:
|
|
logger.info(f"Starting website analysis for {url}")
|
|
|
|
results = {
|
|
'website_url': url,
|
|
'industry': industry,
|
|
'content_analysis': {},
|
|
'structure_analysis': {},
|
|
'performance_analysis': {},
|
|
'seo_analysis': {},
|
|
'ai_insights': {},
|
|
'analysis_timestamp': datetime.utcnow().isoformat()
|
|
}
|
|
|
|
# Analyze content structure
|
|
content_analysis = await self._analyze_content_structure(url)
|
|
results['content_analysis'] = content_analysis
|
|
|
|
# Analyze website structure
|
|
structure_analysis = await self._analyze_website_structure(url)
|
|
results['structure_analysis'] = structure_analysis
|
|
|
|
# Analyze performance metrics
|
|
performance_analysis = await self._analyze_performance_metrics(url)
|
|
results['performance_analysis'] = performance_analysis
|
|
|
|
# Analyze SEO aspects
|
|
seo_analysis = await self._analyze_seo_aspects(url)
|
|
results['seo_analysis'] = seo_analysis
|
|
|
|
# Generate AI insights
|
|
ai_insights = await self._generate_ai_insights(results)
|
|
results['ai_insights'] = ai_insights
|
|
|
|
logger.info(f"Website analysis completed for {url}")
|
|
return results
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in website analysis: {str(e)}")
|
|
return {}
|
|
|
|
async def _analyze_content_structure(self, url: str) -> Dict[str, Any]:
|
|
"""
|
|
Analyze content structure of the website.
|
|
|
|
Args:
|
|
url: Website URL
|
|
|
|
Returns:
|
|
Content structure analysis results
|
|
"""
|
|
try:
|
|
logger.info(f"Analyzing content structure for {url}")
|
|
|
|
# TODO: Integrate with actual content analysis service
|
|
# This will crawl and analyze website content
|
|
|
|
# Simulate content structure analysis
|
|
content_analysis = {
|
|
'total_pages': 150,
|
|
'content_types': {
|
|
'blog_posts': 80,
|
|
'product_pages': 30,
|
|
'landing_pages': 20,
|
|
'guides': 20
|
|
},
|
|
'content_topics': [
|
|
'Industry trends',
|
|
'Best practices',
|
|
'Case studies',
|
|
'Tutorials',
|
|
'Expert insights',
|
|
'Product information',
|
|
'Company news',
|
|
'Customer testimonials'
|
|
],
|
|
'content_depth': {
|
|
'shallow': 20,
|
|
'medium': 60,
|
|
'deep': 70
|
|
},
|
|
'content_quality_score': 8.5,
|
|
'content_freshness': {
|
|
'recent': 40,
|
|
'moderate': 50,
|
|
'outdated': 10
|
|
},
|
|
'content_engagement': {
|
|
'avg_time_on_page': 180,
|
|
'bounce_rate': 0.35,
|
|
'pages_per_session': 2.5,
|
|
'social_shares': 45
|
|
}
|
|
}
|
|
|
|
logger.info("Content structure analysis completed")
|
|
return content_analysis
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in content structure analysis: {str(e)}")
|
|
return {}
|
|
|
|
async def _analyze_website_structure(self, url: str) -> Dict[str, Any]:
|
|
"""
|
|
Analyze website structure and navigation.
|
|
|
|
Args:
|
|
url: Website URL
|
|
|
|
Returns:
|
|
Website structure analysis results
|
|
"""
|
|
try:
|
|
logger.info(f"Analyzing website structure for {url}")
|
|
|
|
# TODO: Integrate with actual structure analysis service
|
|
# This will analyze website architecture and navigation
|
|
|
|
# Simulate website structure analysis
|
|
structure_analysis = {
|
|
'navigation_structure': {
|
|
'main_menu_items': 8,
|
|
'footer_links': 15,
|
|
'breadcrumb_usage': True,
|
|
'sitemap_available': True
|
|
},
|
|
'url_structure': {
|
|
'avg_url_length': 45,
|
|
'seo_friendly_urls': True,
|
|
'url_depth': 3,
|
|
'canonical_urls': True
|
|
},
|
|
'internal_linking': {
|
|
'avg_internal_links_per_page': 8,
|
|
'link_anchor_text_optimization': 75,
|
|
'broken_links': 2,
|
|
'orphaned_pages': 5
|
|
},
|
|
'mobile_friendliness': {
|
|
'responsive_design': True,
|
|
'mobile_optimized': True,
|
|
'touch_friendly': True,
|
|
'mobile_speed': 85
|
|
},
|
|
'page_speed': {
|
|
'desktop_speed': 85,
|
|
'mobile_speed': 75,
|
|
'first_contentful_paint': 1.2,
|
|
'largest_contentful_paint': 2.5
|
|
}
|
|
}
|
|
|
|
logger.info("Website structure analysis completed")
|
|
return structure_analysis
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in website structure analysis: {str(e)}")
|
|
return {}
|
|
|
|
async def _analyze_performance_metrics(self, url: str) -> Dict[str, Any]:
|
|
"""
|
|
Analyze website performance metrics.
|
|
|
|
Args:
|
|
url: Website URL
|
|
|
|
Returns:
|
|
Performance metrics analysis results
|
|
"""
|
|
try:
|
|
logger.info(f"Analyzing performance metrics for {url}")
|
|
|
|
# TODO: Integrate with actual performance analysis service
|
|
# This will analyze website performance metrics
|
|
|
|
# Simulate performance metrics analysis
|
|
performance_analysis = {
|
|
'traffic_metrics': {
|
|
'monthly_visitors': '50K+',
|
|
'page_views': '150K+',
|
|
'unique_visitors': '35K+',
|
|
'traffic_growth': '15%'
|
|
},
|
|
'engagement_metrics': {
|
|
'avg_session_duration': '3:45',
|
|
'bounce_rate': '35%',
|
|
'pages_per_session': 2.5,
|
|
'return_visitor_rate': '25%'
|
|
},
|
|
'conversion_metrics': {
|
|
'conversion_rate': '3.5%',
|
|
'lead_generation': '500+ monthly',
|
|
'sales_conversion': '2.1%',
|
|
'email_signups': '200+ monthly'
|
|
},
|
|
'social_metrics': {
|
|
'social_shares': 45,
|
|
'social_comments': 12,
|
|
'social_engagement_rate': '8.5%',
|
|
'social_reach': '10K+'
|
|
},
|
|
'technical_metrics': {
|
|
'page_load_time': 2.1,
|
|
'server_response_time': 0.8,
|
|
'time_to_interactive': 3.2,
|
|
'cumulative_layout_shift': 0.1
|
|
}
|
|
}
|
|
|
|
logger.info("Performance metrics analysis completed")
|
|
return performance_analysis
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in performance metrics analysis: {str(e)}")
|
|
return {}
|
|
|
|
async def _analyze_seo_aspects(self, url: str) -> Dict[str, Any]:
|
|
"""
|
|
Analyze SEO aspects of the website.
|
|
|
|
Args:
|
|
url: Website URL
|
|
|
|
Returns:
|
|
SEO analysis results
|
|
"""
|
|
try:
|
|
logger.info(f"Analyzing SEO aspects for {url}")
|
|
|
|
# TODO: Integrate with actual SEO analysis service
|
|
# This will analyze SEO aspects of the website
|
|
|
|
# Simulate SEO analysis
|
|
seo_analysis = {
|
|
'technical_seo': {
|
|
'title_tag_optimization': 85,
|
|
'meta_description_optimization': 80,
|
|
'h1_usage': 95,
|
|
'image_alt_text': 70,
|
|
'schema_markup': True,
|
|
'ssl_certificate': True
|
|
},
|
|
'on_page_seo': {
|
|
'keyword_density': 2.5,
|
|
'internal_linking': 8,
|
|
'external_linking': 3,
|
|
'content_length': 1200,
|
|
'readability_score': 75
|
|
},
|
|
'off_page_seo': {
|
|
'domain_authority': 65,
|
|
'backlinks': 2500,
|
|
'referring_domains': 150,
|
|
'social_signals': 45
|
|
},
|
|
'keyword_rankings': {
|
|
'ranking_keywords': 85,
|
|
'top_10_rankings': 25,
|
|
'top_3_rankings': 8,
|
|
'featured_snippets': 3
|
|
},
|
|
'mobile_seo': {
|
|
'mobile_friendly': True,
|
|
'mobile_speed': 75,
|
|
'mobile_usability': 90,
|
|
'amp_pages': 0
|
|
},
|
|
'local_seo': {
|
|
'google_my_business': True,
|
|
'local_citations': 45,
|
|
'local_keywords': 12,
|
|
'local_rankings': 8
|
|
}
|
|
}
|
|
|
|
logger.info("SEO analysis completed")
|
|
return seo_analysis
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in SEO analysis: {str(e)}")
|
|
return {}
|
|
|
|
async def _generate_ai_insights(self, analysis_results: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Generate AI-powered insights for website analysis.
|
|
|
|
Args:
|
|
analysis_results: Complete website analysis results
|
|
|
|
Returns:
|
|
AI-generated insights
|
|
"""
|
|
try:
|
|
logger.info("🤖 Generating AI-powered website insights")
|
|
|
|
# Prepare analysis summary for AI
|
|
analysis_summary = {
|
|
'url': analysis_results.get('website_url', ''),
|
|
'industry': analysis_results.get('industry', ''),
|
|
'content_count': analysis_results.get('content_analysis', {}).get('total_pages', 0),
|
|
'content_quality': analysis_results.get('content_analysis', {}).get('content_quality_score', 0),
|
|
'performance_score': analysis_results.get('performance_analysis', {}).get('traffic_metrics', {}).get('monthly_visitors', ''),
|
|
'seo_score': analysis_results.get('seo_analysis', {}).get('technical_seo', {}).get('title_tag_optimization', 0)
|
|
}
|
|
|
|
# Generate comprehensive AI insights using AI engine
|
|
ai_insights = await self.ai_engine.analyze_website_performance(analysis_summary)
|
|
|
|
if ai_insights:
|
|
logger.info("✅ Generated comprehensive AI website insights")
|
|
return ai_insights
|
|
else:
|
|
logger.warning("⚠️ Could not generate AI website insights")
|
|
return {}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating AI website insights: {str(e)}")
|
|
return {}
|
|
|
|
async def analyze_content_quality(self, url: str) -> Dict[str, Any]:
|
|
"""
|
|
Analyze content quality of the website.
|
|
|
|
Args:
|
|
url: Website URL
|
|
|
|
Returns:
|
|
Content quality analysis results
|
|
"""
|
|
try:
|
|
logger.info(f"Analyzing content quality for {url}")
|
|
|
|
# TODO: Integrate with actual content quality analysis service
|
|
# This will analyze content quality metrics
|
|
|
|
# Simulate content quality analysis
|
|
quality_analysis = {
|
|
'overall_quality_score': 8.5,
|
|
'quality_dimensions': {
|
|
'readability': 8.0,
|
|
'comprehensiveness': 9.0,
|
|
'accuracy': 8.5,
|
|
'engagement': 7.5,
|
|
'seo_optimization': 8.0
|
|
},
|
|
'content_strengths': [
|
|
'Comprehensive topic coverage',
|
|
'Expert-level insights',
|
|
'Clear structure and organization',
|
|
'Accurate information',
|
|
'Good readability'
|
|
],
|
|
'content_weaknesses': [
|
|
'Limited visual content',
|
|
'Missing interactive elements',
|
|
'Outdated information in some areas',
|
|
'Inconsistent content depth'
|
|
],
|
|
'improvement_areas': [
|
|
{
|
|
'area': 'Visual Content',
|
|
'current_score': 6.0,
|
|
'target_score': 9.0,
|
|
'improvement_suggestions': [
|
|
'Add more images and infographics',
|
|
'Include video content',
|
|
'Create visual guides',
|
|
'Add interactive elements'
|
|
]
|
|
},
|
|
{
|
|
'area': 'Content Freshness',
|
|
'current_score': 7.0,
|
|
'target_score': 9.0,
|
|
'improvement_suggestions': [
|
|
'Update outdated content',
|
|
'Add recent industry insights',
|
|
'Include current trends',
|
|
'Regular content audits'
|
|
]
|
|
}
|
|
]
|
|
}
|
|
|
|
logger.info("Content quality analysis completed")
|
|
return quality_analysis
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in content quality analysis: {str(e)}")
|
|
return {}
|
|
|
|
async def analyze_user_experience(self, url: str) -> Dict[str, Any]:
|
|
"""
|
|
Analyze user experience aspects of the website.
|
|
|
|
Args:
|
|
url: Website URL
|
|
|
|
Returns:
|
|
User experience analysis results
|
|
"""
|
|
try:
|
|
logger.info(f"Analyzing user experience for {url}")
|
|
|
|
# TODO: Integrate with actual UX analysis service
|
|
# This will analyze user experience metrics
|
|
|
|
# Simulate UX analysis
|
|
ux_analysis = {
|
|
'navigation_experience': {
|
|
'menu_clarity': 8.5,
|
|
'search_functionality': 7.0,
|
|
'breadcrumb_navigation': 9.0,
|
|
'mobile_navigation': 8.0
|
|
},
|
|
'content_accessibility': {
|
|
'font_readability': 8.5,
|
|
'color_contrast': 9.0,
|
|
'alt_text_usage': 7.5,
|
|
'keyboard_navigation': 8.0
|
|
},
|
|
'page_speed_experience': {
|
|
'loading_perception': 7.5,
|
|
'interactive_elements': 8.0,
|
|
'smooth_scrolling': 8.5,
|
|
'mobile_performance': 7.0
|
|
},
|
|
'content_engagement': {
|
|
'content_clarity': 8.5,
|
|
'call_to_action_visibility': 7.5,
|
|
'content_scannability': 8.0,
|
|
'information_architecture': 8.5
|
|
},
|
|
'overall_ux_score': 8.2,
|
|
'improvement_suggestions': [
|
|
'Improve search functionality',
|
|
'Add more visual content',
|
|
'Optimize mobile experience',
|
|
'Enhance call-to-action visibility'
|
|
]
|
|
}
|
|
|
|
logger.info("User experience analysis completed")
|
|
return ux_analysis
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in user experience analysis: {str(e)}")
|
|
return {}
|
|
|
|
async def get_website_summary(self, analysis_id: str) -> Dict[str, Any]:
|
|
"""
|
|
Get a summary of website analysis.
|
|
|
|
Args:
|
|
analysis_id: Analysis identifier
|
|
|
|
Returns:
|
|
Website analysis summary
|
|
"""
|
|
try:
|
|
logger.info(f"Getting website analysis summary for {analysis_id}")
|
|
|
|
# TODO: Retrieve analysis from database
|
|
# This will be implemented when database integration is complete
|
|
|
|
summary = {
|
|
'analysis_id': analysis_id,
|
|
'pages_analyzed': 25,
|
|
'content_score': 8.5,
|
|
'seo_score': 7.8,
|
|
'user_experience_score': 8.2,
|
|
'improvement_areas': [
|
|
'Content depth and comprehensiveness',
|
|
'SEO optimization',
|
|
'Mobile responsiveness'
|
|
],
|
|
'timestamp': datetime.utcnow().isoformat()
|
|
}
|
|
|
|
return summary
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting website summary: {str(e)}")
|
|
return {}
|
|
|
|
async def health_check(self) -> Dict[str, Any]:
|
|
"""
|
|
Health check for the website analyzer service.
|
|
|
|
Returns:
|
|
Health status information
|
|
"""
|
|
try:
|
|
logger.info("Performing health check for WebsiteAnalyzer")
|
|
|
|
health_status = {
|
|
'service': 'WebsiteAnalyzer',
|
|
'status': 'healthy',
|
|
'dependencies': {
|
|
'ai_engine': 'operational'
|
|
},
|
|
'capabilities': {
|
|
'content_analysis': 'operational',
|
|
'structure_analysis': 'operational',
|
|
'performance_analysis': 'operational',
|
|
'seo_analysis': 'operational'
|
|
},
|
|
'timestamp': datetime.utcnow().isoformat()
|
|
}
|
|
|
|
logger.info("WebsiteAnalyzer health check passed")
|
|
return health_status
|
|
|
|
except Exception as e:
|
|
logger.error(f"WebsiteAnalyzer health check failed: {str(e)}")
|
|
return {
|
|
'service': 'WebsiteAnalyzer',
|
|
'status': 'unhealthy',
|
|
'error': str(e),
|
|
'timestamp': datetime.utcnow().isoformat()
|
|
} |