Recovered state: integrated TrendSurferAgent, restored frontend/backend files, and cleaned up recovery scripts

2026-02-08 13:56:57 +05:30
parent 1db10ccd0f
commit e404a86502
333 changed files with 42223 additions and 10875 deletions
--- a/backend/services/seo_tools/technical_seo_service.py
+++ b/backend/services/seo_tools/technical_seo_service.py
@@ -5,8 +5,12 @@ Comprehensive technical SEO crawler and analyzer with AI-enhanced
 insights for website optimization and search engine compatibility.
 """

+import aiohttp
+import asyncio
+from bs4 import BeautifulSoup
+from urllib.parse import urlparse, urljoin
+import time
 from typing import Dict, Any, List, Optional
-from datetime import datetime
 from loguru import logger

 class TechnicalSEOService:
@@ -16,6 +20,9 @@ class TechnicalSEOService:
        """Initialize the technical SEO service"""
        self.service_name = "technical_seo_analyzer"
        logger.info(f"Initialized {self.service_name}")
+        self.headers = {
+            'User-Agent': 'Mozilla/5.0 (compatible; ALwritySEO/1.0; +http://alwrity.com/bot)'
+        }
    
    async def analyze_technical_seo(
        self,
@@ -25,20 +32,115 @@ class TechnicalSEOService:
        analyze_performance: bool = True
    ) -> Dict[str, Any]:
        """Analyze technical SEO factors"""
-        # Placeholder implementation
-        return {
-            "url": url,
-            "pages_crawled": 25,
-            "crawl_depth": crawl_depth,
-            "technical_issues": [
-                {"type": "Missing robots.txt", "severity": "Medium", "pages_affected": 1},
-                {"type": "Slow loading pages", "severity": "High", "pages_affected": 3}
-            ],
-            "site_structure": {"internal_links": 150, "external_links": 25 if include_external_links else 0},
-            "performance_metrics": {"avg_load_time": 2.5, "largest_contentful_paint": 1.8} if analyze_performance else {},
-            "recommendations": ["Implement robots.txt", "Optimize page load speed"],
-            "crawl_summary": {"successful": 23, "errors": 2, "redirects": 5}
-        }
+        try:
+            start_time = time.time()
+            async with aiohttp.ClientSession(headers=self.headers) as session:
+                async with session.get(url, timeout=30) as response:
+                    load_time = time.time() - start_time
+                    status_code = response.status
+                    content = await response.text()
+                    headers = response.headers
+
+                    # Basic parsing
+                    soup = BeautifulSoup(content, 'html.parser')
+                    
+                    # 1. Meta Tags Analysis
+                    title = soup.title.string if soup.title else None
+                    meta_desc = soup.find('meta', attrs={'name': 'description'})
+                    meta_desc_content = meta_desc['content'] if meta_desc else None
+                    
+                    # 2. Heading Structure
+                    h1_tags = soup.find_all('h1')
+                    h2_tags = soup.find_all('h2')
+                    h3_tags = soup.find_all('h3')
+                    
+                    # 3. Image Analysis
+                    images = soup.find_all('img')
+                    images_without_alt = [img['src'] for img in images if not img.get('alt')]
+                    
+                    # 4. Link Analysis
+                    links = soup.find_all('a')
+                    internal_links = []
+                    external_links = []
+                    domain = urlparse(url).netloc
+                    
+                    for link in links:
+                        href = link.get('href')
+                        if not href:
+                            continue
+                        if href.startswith('http'):
+                            if domain in href:
+                                internal_links.append(href)
+                            else:
+                                external_links.append(href)
+                        elif href.startswith('/'):
+                             internal_links.append(urljoin(url, href))
+
+                    # 5. Technical Issues Detection
+                    issues = []
+                    
+                    # Status Code Issues
+                    if status_code != 200:
+                        issues.append({"type": f"Status Code {status_code}", "severity": "High", "pages_affected": 1})
+                    
+                    # Performance Issues
+                    if load_time > 2.0:
+                        issues.append({"type": "Slow Server Response", "severity": "Medium", "pages_affected": 1})
+                    
+                    # Meta Issues
+                    if not title:
+                        issues.append({"type": "Missing Title Tag", "severity": "High", "pages_affected": 1})
+                    elif len(title) > 60:
+                        issues.append({"type": "Title Tag Too Long", "severity": "Low", "pages_affected": 1})
+                        
+                    if not meta_desc_content:
+                        issues.append({"type": "Missing Meta Description", "severity": "High", "pages_affected": 1})
+                    
+                    # Content Structure Issues
+                    if not h1_tags:
+                        issues.append({"type": "Missing H1 Tag", "severity": "High", "pages_affected": 1})
+                    elif len(h1_tags) > 1:
+                        issues.append({"type": "Multiple H1 Tags", "severity": "Medium", "pages_affected": 1})
+                        
+                    # Image Issues
+                    if images_without_alt:
+                        issues.append({"type": "Images Missing Alt Text", "severity": "Medium", "pages_affected": len(images_without_alt)})
+                    
+                    # Security Issues
+                    if url.startswith('http:'):
+                        issues.append({"type": "Insecure Protocol (HTTP)", "severity": "High", "pages_affected": 1})
+                    
+                    return {
+                        "url": url,
+                        "pages_crawled": 1, # Currently single page
+                        "crawl_depth": 1,
+                        "technical_issues": issues,
+                        "site_structure": {
+                            "internal_links": len(internal_links),
+                            "external_links": len(external_links) if include_external_links else 0,
+                            "h1_count": len(h1_tags),
+                            "h2_count": len(h2_tags),
+                            "h3_count": len(h3_tags)
+                        },
+                        "performance_metrics": {
+                            "response_time": round(load_time, 3),
+                            "content_size": len(content)
+                        } if analyze_performance else {},
+                        "recommendations": [issue['type'] for issue in issues],
+                        "crawl_summary": {
+                            "successful": 1 if status_code == 200 else 0,
+                            "errors": 1 if status_code >= 400 else 0,
+                            "redirects": 1 if 300 <= status_code < 400 else 0
+                        }
+                    }
+
+        except Exception as e:
+            logger.error(f"Error in technical SEO analysis: {e}")
+            return {
+                "url": url,
+                "error": str(e),
+                "technical_issues": [{"type": "Crawl Failed", "severity": "High", "pages_affected": 1}]
+            }
    
    async def health_check(self) -> Dict[str, Any]:
        """Health check for the technical SEO service"""