"""SEO analyzer module with AI integration.""" import requests from bs4 import BeautifulSoup from datetime import datetime from typing import Dict, List, Tuple, Optional from urllib.parse import urlparse import openai from loguru import logger import os from dotenv import load_dotenv from .models import ( SEOAnalysisResult, MetaTagAnalysis, ContentAnalysis, SEORecommendation ) def extract_content(url: str) -> Tuple[Optional[str], Optional[BeautifulSoup], List[str]]: """Extract content from URL.""" errors = [] try: response = requests.get(url, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') return response.text, soup, errors except requests.RequestException as e: error_msg = f"Error fetching URL: {str(e)}" logger.error(error_msg) errors.append(error_msg) return None, None, errors def analyze_meta_tags(soup: BeautifulSoup) -> MetaTagAnalysis: """Analyze meta tags using BeautifulSoup.""" # Title analysis title = soup.title.string if soup.title else "" title_analysis = { 'status': 'good' if title and 30 <= len(title) <= 60 else 'needs_improvement', 'value': title, 'recommendation': '' if title and 30 <= len(title) <= 60 else 'Title should be between 30-60 characters' } # Meta description analysis meta_desc = soup.find('meta', attrs={'name': 'description'}) desc = meta_desc.get('content', '') if meta_desc else "" desc_analysis = { 'status': 'good' if desc and 120 <= len(desc) <= 160 else 'needs_improvement', 'value': desc, 'recommendation': '' if desc and 120 <= len(desc) <= 160 else 'Description should be between 120-160 characters' } # Keywords analysis meta_keywords = soup.find('meta', attrs={'name': 'keywords'}) keywords = meta_keywords.get('content', '') if meta_keywords else "" keywords_analysis = { 'status': 'good' if keywords else 'needs_improvement', 'value': keywords, 'recommendation': '' if keywords else 'Add relevant keywords meta tag' } return MetaTagAnalysis( title=title_analysis, description=desc_analysis, keywords=keywords_analysis, has_robots=bool(soup.find('meta', attrs={'name': 'robots'})), has_sitemap=bool(soup.find('link', attrs={'rel': 'sitemap'})) ) def analyze_content_with_ai(content: str) -> Tuple[ContentAnalysis, List[SEORecommendation]]: """Analyze content using AI.""" try: # Load environment variables load_dotenv() # Get API key from environment api_key = os.getenv('OPENAI_API_KEY') if not api_key: raise ValueError("OpenAI API key not found in environment variables") # Initialize OpenAI client client = openai.OpenAI(api_key=api_key) # Prepare prompt for content analysis prompt = f"""Analyze the following webpage content for SEO and provide a structured analysis: Content: {content[:4000]}... # Truncate to avoid token limits Provide analysis in the following format: 1. Word count 2. Heading structure analysis 3. Keyword density for main topics 4. Readability score (0-100) 5. Content quality score (0-100) 6. List of SEO recommendations with priority (high/medium/low), category, issue, recommendation, and impact Format the response as JSON.""" # Get AI analysis response = client.chat.completions.create( model="gpt-4", messages=[ {"role": "system", "content": "You are an SEO expert analyzing website content."}, {"role": "user", "content": prompt} ], response_format={"type": "json_object"} ) # Parse AI response analysis = response.choices[0].message.content # Create ContentAnalysis object content_analysis = ContentAnalysis( word_count=len(content.split()), headings_structure=analysis.get('heading_structure', {}), keyword_density=analysis.get('keyword_density', {}), readability_score=analysis.get('readability_score', 0), content_quality_score=analysis.get('content_quality_score', 0) ) # Create recommendations recommendations = [ SEORecommendation( priority=rec['priority'], category=rec['category'], issue=rec['issue'], recommendation=rec['recommendation'], impact=rec['impact'] ) for rec in analysis.get('recommendations', []) ] return content_analysis, recommendations except Exception as e: logger.error(f"Error in AI analysis: {str(e)}") return ContentAnalysis( word_count=len(content.split()), headings_structure={}, keyword_density={}, readability_score=0, content_quality_score=0 ), [] def analyze_seo(url: str) -> SEOAnalysisResult: """Main function to analyze website SEO.""" errors = [] warnings = [] # Validate URL try: parsed_url = urlparse(url) if not all([parsed_url.scheme, parsed_url.netloc]): errors.append("Invalid URL format") raise ValueError("Invalid URL format") except Exception as e: errors.append(f"URL parsing error: {str(e)}") return SEOAnalysisResult( url=url, analyzed_at=datetime.now(), overall_score=0, meta_tags=None, content=None, recommendations=[], errors=errors, warnings=warnings, success=False ) # Extract content content, soup, extract_errors = extract_content(url) errors.extend(extract_errors) if not content or not soup: return SEOAnalysisResult( url=url, analyzed_at=datetime.now(), overall_score=0, meta_tags=None, content=None, recommendations=[], errors=errors, warnings=warnings, success=False ) try: # Analyze meta tags meta_analysis = analyze_meta_tags(soup) # Analyze content with AI content_analysis, recommendations = analyze_content_with_ai(content) # Calculate overall score meta_score = sum([ 1 if meta_analysis.title['status'] == 'good' else 0, 1 if meta_analysis.description['status'] == 'good' else 0, 1 if meta_analysis.keywords['status'] == 'good' else 0, 1 if meta_analysis.has_robots else 0, 1 if meta_analysis.has_sitemap else 0 ]) * 20 # Scale to 100 overall_score = ( meta_score * 0.3 + # 30% weight for meta tags content_analysis.readability_score * 0.3 + # 30% weight for readability content_analysis.content_quality_score * 0.4 # 40% weight for content quality ) return SEOAnalysisResult( url=url, analyzed_at=datetime.now(), overall_score=overall_score, meta_tags=meta_analysis, content=content_analysis, recommendations=recommendations, errors=errors, warnings=warnings, success=True ) except Exception as e: error_msg = f"Error in SEO analysis: {str(e)}" logger.error(error_msg) errors.append(error_msg) return SEOAnalysisResult( url=url, analyzed_at=datetime.now(), overall_score=0, meta_tags=None, content=None, recommendations=[], errors=errors, warnings=warnings, success=False )