Recovered state: integrated TrendSurferAgent, restored frontend/backend files, and cleaned up recovery scripts

This commit is contained in:
ajaysi
2026-02-08 13:56:57 +05:30
parent 1db10ccd0f
commit e404a86502
333 changed files with 42223 additions and 10875 deletions

View File

@@ -5,8 +5,12 @@ Comprehensive technical SEO crawler and analyzer with AI-enhanced
insights for website optimization and search engine compatibility.
"""
import aiohttp
import asyncio
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
import time
from typing import Dict, Any, List, Optional
from datetime import datetime
from loguru import logger
class TechnicalSEOService:
@@ -16,6 +20,9 @@ class TechnicalSEOService:
"""Initialize the technical SEO service"""
self.service_name = "technical_seo_analyzer"
logger.info(f"Initialized {self.service_name}")
self.headers = {
'User-Agent': 'Mozilla/5.0 (compatible; ALwritySEO/1.0; +http://alwrity.com/bot)'
}
async def analyze_technical_seo(
self,
@@ -25,20 +32,115 @@ class TechnicalSEOService:
analyze_performance: bool = True
) -> Dict[str, Any]:
"""Analyze technical SEO factors"""
# Placeholder implementation
return {
"url": url,
"pages_crawled": 25,
"crawl_depth": crawl_depth,
"technical_issues": [
{"type": "Missing robots.txt", "severity": "Medium", "pages_affected": 1},
{"type": "Slow loading pages", "severity": "High", "pages_affected": 3}
],
"site_structure": {"internal_links": 150, "external_links": 25 if include_external_links else 0},
"performance_metrics": {"avg_load_time": 2.5, "largest_contentful_paint": 1.8} if analyze_performance else {},
"recommendations": ["Implement robots.txt", "Optimize page load speed"],
"crawl_summary": {"successful": 23, "errors": 2, "redirects": 5}
}
try:
start_time = time.time()
async with aiohttp.ClientSession(headers=self.headers) as session:
async with session.get(url, timeout=30) as response:
load_time = time.time() - start_time
status_code = response.status
content = await response.text()
headers = response.headers
# Basic parsing
soup = BeautifulSoup(content, 'html.parser')
# 1. Meta Tags Analysis
title = soup.title.string if soup.title else None
meta_desc = soup.find('meta', attrs={'name': 'description'})
meta_desc_content = meta_desc['content'] if meta_desc else None
# 2. Heading Structure
h1_tags = soup.find_all('h1')
h2_tags = soup.find_all('h2')
h3_tags = soup.find_all('h3')
# 3. Image Analysis
images = soup.find_all('img')
images_without_alt = [img['src'] for img in images if not img.get('alt')]
# 4. Link Analysis
links = soup.find_all('a')
internal_links = []
external_links = []
domain = urlparse(url).netloc
for link in links:
href = link.get('href')
if not href:
continue
if href.startswith('http'):
if domain in href:
internal_links.append(href)
else:
external_links.append(href)
elif href.startswith('/'):
internal_links.append(urljoin(url, href))
# 5. Technical Issues Detection
issues = []
# Status Code Issues
if status_code != 200:
issues.append({"type": f"Status Code {status_code}", "severity": "High", "pages_affected": 1})
# Performance Issues
if load_time > 2.0:
issues.append({"type": "Slow Server Response", "severity": "Medium", "pages_affected": 1})
# Meta Issues
if not title:
issues.append({"type": "Missing Title Tag", "severity": "High", "pages_affected": 1})
elif len(title) > 60:
issues.append({"type": "Title Tag Too Long", "severity": "Low", "pages_affected": 1})
if not meta_desc_content:
issues.append({"type": "Missing Meta Description", "severity": "High", "pages_affected": 1})
# Content Structure Issues
if not h1_tags:
issues.append({"type": "Missing H1 Tag", "severity": "High", "pages_affected": 1})
elif len(h1_tags) > 1:
issues.append({"type": "Multiple H1 Tags", "severity": "Medium", "pages_affected": 1})
# Image Issues
if images_without_alt:
issues.append({"type": "Images Missing Alt Text", "severity": "Medium", "pages_affected": len(images_without_alt)})
# Security Issues
if url.startswith('http:'):
issues.append({"type": "Insecure Protocol (HTTP)", "severity": "High", "pages_affected": 1})
return {
"url": url,
"pages_crawled": 1, # Currently single page
"crawl_depth": 1,
"technical_issues": issues,
"site_structure": {
"internal_links": len(internal_links),
"external_links": len(external_links) if include_external_links else 0,
"h1_count": len(h1_tags),
"h2_count": len(h2_tags),
"h3_count": len(h3_tags)
},
"performance_metrics": {
"response_time": round(load_time, 3),
"content_size": len(content)
} if analyze_performance else {},
"recommendations": [issue['type'] for issue in issues],
"crawl_summary": {
"successful": 1 if status_code == 200 else 0,
"errors": 1 if status_code >= 400 else 0,
"redirects": 1 if 300 <= status_code < 400 else 0
}
}
except Exception as e:
logger.error(f"Error in technical SEO analysis: {e}")
return {
"url": url,
"error": str(e),
"technical_issues": [{"type": "Crawl Failed", "severity": "High", "pages_affected": 1}]
}
async def health_check(self) -> Dict[str, Any]:
"""Health check for the technical SEO service"""