Recovered state: integrated TrendSurferAgent, restored frontend/backend files, and cleaned up recovery scripts

This commit is contained in:
ajaysi
2026-02-08 13:56:57 +05:30
parent 1db10ccd0f
commit e404a86502
333 changed files with 42223 additions and 10875 deletions

View File

@@ -5,9 +5,13 @@ Comprehensive on-page SEO analyzer with AI-enhanced insights
for content optimization and technical improvements.
"""
import aiohttp
from bs4 import BeautifulSoup
from typing import Dict, Any, List, Optional
from datetime import datetime
from loguru import logger
import re
from urllib.parse import urlparse
class OnPageSEOService:
"""Service for comprehensive on-page SEO analysis"""
@@ -17,6 +21,155 @@ class OnPageSEOService:
self.service_name = "on_page_seo_analyzer"
logger.info(f"Initialized {self.service_name}")
async def _fetch_page(self, url: str) -> tuple[Optional[str], int]:
"""Fetch page content"""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (compatible; ALwritySEO/1.0; +https://alwrity.com)'
}
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers, timeout=10) as response:
if response.status == 200:
return await response.text(), 200
return None, response.status
except Exception as e:
logger.error(f"Error fetching {url}: {str(e)}")
return None, 500
def _analyze_meta_tags(self, soup: BeautifulSoup) -> Dict[str, Any]:
"""Analyze meta tags"""
title = soup.title.string if soup.title else None
meta_desc = soup.find('meta', attrs={'name': 'description'})
viewport = soup.find('meta', attrs={'name': 'viewport'})
robots = soup.find('meta', attrs={'name': 'robots'})
charset = soup.find('meta', attrs={'charset': True})
# Social Tags
og_title = soup.find('meta', property='og:title')
og_desc = soup.find('meta', property='og:description')
og_image = soup.find('meta', property='og:image')
twitter_card = soup.find('meta', attrs={'name': 'twitter:card'})
issues = []
score = 100
# Title Analysis
if not title:
issues.append("Missing title tag")
score -= 20
elif len(title) < 30 or len(title) > 60:
issues.append(f"Title length ({len(title)} chars) should be 30-60 chars")
score -= 10
# Description Analysis
desc_content = meta_desc['content'] if meta_desc else None
if not desc_content:
issues.append("Missing meta description")
score -= 20
elif len(desc_content) < 70 or len(desc_content) > 160:
issues.append(f"Description length ({len(desc_content)} chars) should be 70-160 chars")
score -= 10
# Viewport
if not viewport:
issues.append("Missing viewport meta tag")
score -= 20
og_found = list(filter(None, ['Title' if og_title else '', 'Desc' if og_desc else '', 'Image' if og_image else '']))
return {
"title_length": f"{len(title)} chars" if title else "Missing",
"meta_description_length": f"{len(desc_content)} chars" if desc_content else "Missing",
"has_viewport": bool(viewport),
"charset": charset['charset'] if charset else "Missing",
"robots_meta": robots['content'] if robots else "Missing (Default: index, follow)",
"og_tags": f"Found: {', '.join(og_found)}" if og_found else "None",
"twitter_card": twitter_card['content'] if twitter_card else "Missing",
"score": max(0, score),
"issues": issues
}
def _analyze_technical(self, soup: BeautifulSoup, url: str) -> Dict[str, Any]:
"""Analyze technical SEO elements"""
canonical = soup.find('link', attrs={'rel': 'canonical'})
schema = soup.find_all('script', type='application/ld+json')
issues = []
score = 100
if not canonical:
issues.append("Missing canonical tag")
score -= 10
# Check H1
h1_tags = soup.find_all('h1')
if len(h1_tags) == 0:
issues.append("Missing H1 tag")
score -= 20
elif len(h1_tags) > 1:
issues.append(f"Multiple H1 tags found ({len(h1_tags)})")
score -= 10
return {
"canonical_tag": canonical['href'] if canonical else "Missing",
"schema_markup": f"Found {len(schema)} schema objects",
"h1_count": len(h1_tags),
"score": max(0, score),
"issues": issues
}
def _analyze_content(self, soup: BeautifulSoup) -> Dict[str, Any]:
"""Analyze content quality"""
# Remove scripts and styles
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
words = len(re.findall(r'\w+', text))
images = soup.find_all('img')
images_without_alt = sum(1 for img in images if not img.get('alt'))
issues = []
score = 100
if words < 300:
issues.append(f"Low word count ({words} words)")
score -= 20
if images_without_alt > 0:
issues.append(f"{images_without_alt} images missing alt text")
score -= 10
return {
"word_count": words,
"total_images": len(images),
"images_without_alt": images_without_alt,
"readability": "Good" if words > 300 else "Needs Improvement", # Placeholder for readability algo
"score": max(0, score),
"issues": issues
}
def _analyze_url_structure(self, url: str) -> Dict[str, Any]:
parsed = urlparse(url)
return {
"protocol": parsed.scheme,
"domain": parsed.netloc,
"path_depth": len(parsed.path.strip('/').split('/')) if parsed.path else 0,
"is_https": parsed.scheme == 'https'
}
def _calculate_overall_score(self, *analyses) -> int:
total = sum(a.get('score', 0) for a in analyses)
return round(total / len(analyses))
def _generate_summary(self, *analyses) -> Dict[str, Any]:
critical_issues = []
for a in analyses:
for issue in a.get('issues', []):
critical_issues.append({"message": issue, "severity": "critical", "category": "SEO"})
return {"critical_issues": critical_issues}
async def analyze_on_page_seo(
self,
url: str,
@@ -25,18 +178,53 @@ class OnPageSEOService:
analyze_content_quality: bool = True
) -> Dict[str, Any]:
"""Analyze on-page SEO factors"""
# Placeholder implementation
return {
"url": url,
"overall_score": 75,
"title_analysis": {"score": 80, "issues": [], "recommendations": []},
"meta_description": {"score": 70, "issues": [], "recommendations": []},
"heading_structure": {"score": 85, "issues": [], "recommendations": []},
"content_analysis": {"score": 75, "word_count": 1500, "readability": "Good"},
"keyword_analysis": {"target_keywords": target_keywords or [], "optimization": "Moderate"},
"image_analysis": {"total_images": 10, "missing_alt": 2} if analyze_images else {},
"recommendations": ["Optimize meta description", "Add more target keywords"]
}
try:
# Add protocol if missing
if not url.startswith(('http://', 'https://')):
url = 'https://' + url
html_content, status_code = await self._fetch_page(url)
if not html_content:
# Return error structure
return {
"url": url,
"overall_score": 0,
"summary": {"critical_issues": [{"message": f"Failed to fetch URL (Status: {status_code})", "severity": "critical", "category": "Connectivity"}]},
"meta": {}, "technical": {}, "content_health": {}, "url_structure": {}, "performance": {}, "accessibility": {}, "ux": {}
}
soup = BeautifulSoup(html_content, 'html.parser')
# Run Analyses
meta_analysis = self._analyze_meta_tags(soup)
technical_analysis = self._analyze_technical(soup, url)
content_analysis = self._analyze_content(soup)
url_analysis = self._analyze_url_structure(url)
result = {
"url": url,
"overall_score": self._calculate_overall_score(meta_analysis, technical_analysis, content_analysis),
"meta": meta_analysis,
"technical": technical_analysis,
"content_health": content_analysis,
"url_structure": url_analysis,
"performance": {"load_time": "Real-time check pending"},
"accessibility": {"images_without_alt": content_analysis["images_without_alt"]},
"ux": {"viewport": meta_analysis["has_viewport"], "mobile_friendly": bool(meta_analysis["has_viewport"])},
"summary": self._generate_summary(meta_analysis, technical_analysis, content_analysis)
}
return result
except Exception as e:
logger.error(f"Error analyzing {url}: {str(e)}")
return {
"url": url,
"overall_score": 0,
"summary": {"critical_issues": [{"message": str(e), "severity": "critical", "category": "System"}]},
"meta": {}, "technical": {}, "content_health": {}, "url_structure": {}, "performance": {}, "accessibility": {}, "ux": {}
}
async def health_check(self) -> Dict[str, Any]:
"""Health check for the on-page SEO service"""
@@ -44,4 +232,4 @@ class OnPageSEOService:
"status": "operational",
"service": self.service_name,
"last_check": datetime.utcnow().isoformat()
}
}