Files
ALwrity/ToBeMigrated/utils/website_analyzer/content_gap_analyzer.py
2025-08-06 16:29:49 +05:30

134 lines
5.4 KiB
Python

from typing import Dict
import json
class ContentGapAnalyzer:
def __init__(self, analyzer):
self.analyzer = analyzer
def analyze(self, url: str) -> Dict:
"""
Analyze content gaps for a given URL.
Args:
url (str): The URL to analyze
Returns:
Dict: Analysis results including content gaps and recommendations
"""
try:
# Get base analysis
logger.info(f"Starting content gap analysis for URL: {url}")
base_analysis = self.analyzer.analyze_website(url)
# Check for errors in base analysis
if not base_analysis.get("success", False):
error_msg = base_analysis.get("error", "Unknown error in website analysis")
error_details = base_analysis.get("error_details", {})
logger.error(f"Base analysis failed: {error_msg}")
logger.error(f"Error details: {json.dumps(error_details, indent=2)}")
return {
"success": False,
"error": error_msg,
"error_details": error_details,
"stage": "base_analysis"
}
# Extract required sections
analysis_data = base_analysis.get("data", {}).get("analysis", {})
required_sections = ["content_info", "basic_info", "performance"]
missing_sections = [section for section in required_sections if section not in analysis_data]
if missing_sections:
error_msg = f"Missing required analysis sections: {', '.join(missing_sections)}"
logger.error(error_msg)
logger.error(f"Available sections: {list(analysis_data.keys())}")
return {
"success": False,
"error": error_msg,
"error_details": {
"missing_sections": missing_sections,
"available_sections": list(analysis_data.keys())
},
"stage": "section_validation"
}
# Extract content metrics
try:
content_info = analysis_data["content_info"]
basic_info = analysis_data["basic_info"]
performance = analysis_data["performance"]
except KeyError as e:
error_msg = f"Error extracting analysis section: {str(e)}"
logger.error(error_msg)
return {
"success": False,
"error": error_msg,
"error_details": {
"type": "KeyError",
"missing_key": str(e),
"available_keys": list(analysis_data.keys())
},
"stage": "data_extraction"
}
# Analyze content gaps
try:
gaps = self._analyze_content_gaps(content_info, basic_info, performance)
except Exception as e:
error_msg = f"Error analyzing content gaps: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
"success": False,
"error": error_msg,
"error_details": {
"type": type(e).__name__,
"traceback": str(e.__traceback__)
},
"stage": "gap_analysis"
}
# Generate recommendations
try:
recommendations = self._generate_recommendations(gaps)
except Exception as e:
error_msg = f"Error generating recommendations: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
"success": False,
"error": error_msg,
"error_details": {
"type": type(e).__name__,
"traceback": str(e.__traceback__)
},
"stage": "recommendation_generation"
}
return {
"success": True,
"data": {
"content_gaps": gaps,
"recommendations": recommendations,
"metrics": {
"word_count": content_info.get("word_count", 0),
"heading_count": content_info.get("heading_count", 0),
"image_count": content_info.get("image_count", 0),
"link_count": content_info.get("link_count", 0),
"paragraph_count": content_info.get("paragraph_count", 0),
"load_time": performance.get("load_time", 0),
"response_time": performance.get("response_time", 0)
}
}
}
except Exception as e:
error_msg = f"Error in content gap analysis: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
"success": False,
"error": error_msg,
"error_details": {
"type": type(e).__name__,
"traceback": str(e.__traceback__)
},
"stage": "general"
}