Add AI SEO tools with FastAPI endpoints and comprehensive services
Co-authored-by: ajay.calsoft <ajay.calsoft@gmail.com>
This commit is contained in:
601
backend/services/seo_tools/pagespeed_service.py
Normal file
601
backend/services/seo_tools/pagespeed_service.py
Normal file
@@ -0,0 +1,601 @@
|
||||
"""
|
||||
Google PageSpeed Insights Service
|
||||
|
||||
AI-enhanced PageSpeed analysis service that provides comprehensive
|
||||
performance insights with actionable recommendations for optimization.
|
||||
"""
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
import os
|
||||
|
||||
from ..llm_providers.main_text_generation import llm_text_gen
|
||||
from ...middleware.logging_middleware import seo_logger
|
||||
|
||||
|
||||
class PageSpeedService:
|
||||
"""Service for Google PageSpeed Insights analysis with AI enhancement"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the PageSpeed service"""
|
||||
self.service_name = "pagespeed_analyzer"
|
||||
self.api_key = os.getenv("GOOGLE_PAGESPEED_API_KEY")
|
||||
self.base_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
|
||||
logger.info(f"Initialized {self.service_name}")
|
||||
|
||||
async def analyze_pagespeed(
|
||||
self,
|
||||
url: str,
|
||||
strategy: str = "DESKTOP",
|
||||
locale: str = "en",
|
||||
categories: List[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze website performance using Google PageSpeed Insights
|
||||
|
||||
Args:
|
||||
url: URL to analyze
|
||||
strategy: Analysis strategy (DESKTOP/MOBILE)
|
||||
locale: Locale for analysis
|
||||
categories: Categories to analyze
|
||||
|
||||
Returns:
|
||||
Dictionary containing performance analysis and AI insights
|
||||
"""
|
||||
try:
|
||||
start_time = datetime.utcnow()
|
||||
|
||||
if categories is None:
|
||||
categories = ["performance", "accessibility", "best-practices", "seo"]
|
||||
|
||||
# Validate inputs
|
||||
if not url:
|
||||
raise ValueError("URL is required")
|
||||
|
||||
if strategy not in ["DESKTOP", "MOBILE"]:
|
||||
raise ValueError("Strategy must be DESKTOP or MOBILE")
|
||||
|
||||
logger.info(f"Analyzing PageSpeed for URL: {url} (Strategy: {strategy})")
|
||||
|
||||
# Fetch PageSpeed data
|
||||
pagespeed_data = await self._fetch_pagespeed_data(url, strategy, locale, categories)
|
||||
|
||||
if not pagespeed_data:
|
||||
raise Exception("Failed to fetch PageSpeed data")
|
||||
|
||||
# Extract and structure the data
|
||||
structured_results = self._structure_pagespeed_results(pagespeed_data)
|
||||
|
||||
# Generate AI-enhanced insights
|
||||
ai_insights = await self._generate_ai_insights(structured_results, url, strategy)
|
||||
|
||||
# Calculate optimization priority
|
||||
optimization_plan = self._create_optimization_plan(structured_results)
|
||||
|
||||
execution_time = (datetime.utcnow() - start_time).total_seconds()
|
||||
|
||||
result = {
|
||||
"url": url,
|
||||
"strategy": strategy,
|
||||
"analysis_date": datetime.utcnow().isoformat(),
|
||||
"core_web_vitals": structured_results.get("core_web_vitals", {}),
|
||||
"category_scores": structured_results.get("category_scores", {}),
|
||||
"metrics": structured_results.get("metrics", {}),
|
||||
"opportunities": structured_results.get("opportunities", []),
|
||||
"diagnostics": structured_results.get("diagnostics", []),
|
||||
"ai_insights": ai_insights,
|
||||
"optimization_plan": optimization_plan,
|
||||
"raw_data": {
|
||||
"lighthouse_version": pagespeed_data.get("lighthouseResult", {}).get("lighthouseVersion"),
|
||||
"fetch_time": pagespeed_data.get("analysisUTCTimestamp"),
|
||||
"categories_analyzed": categories
|
||||
},
|
||||
"execution_time": execution_time
|
||||
}
|
||||
|
||||
# Log the operation
|
||||
await seo_logger.log_tool_usage(
|
||||
tool_name=self.service_name,
|
||||
input_data={
|
||||
"url": url,
|
||||
"strategy": strategy,
|
||||
"locale": locale,
|
||||
"categories": categories
|
||||
},
|
||||
output_data=result,
|
||||
success=True
|
||||
)
|
||||
|
||||
await seo_logger.log_external_api_call(
|
||||
api_name="Google PageSpeed Insights",
|
||||
endpoint=self.base_url,
|
||||
response_code=200,
|
||||
response_time=execution_time,
|
||||
request_data={"url": url, "strategy": strategy}
|
||||
)
|
||||
|
||||
logger.info(f"PageSpeed analysis completed for {url}")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing PageSpeed for {url}: {e}")
|
||||
|
||||
# Log the error
|
||||
await seo_logger.log_tool_usage(
|
||||
tool_name=self.service_name,
|
||||
input_data={
|
||||
"url": url,
|
||||
"strategy": strategy,
|
||||
"locale": locale,
|
||||
"categories": categories
|
||||
},
|
||||
output_data={"error": str(e)},
|
||||
success=False
|
||||
)
|
||||
|
||||
raise
|
||||
|
||||
async def _fetch_pagespeed_data(
|
||||
self,
|
||||
url: str,
|
||||
strategy: str,
|
||||
locale: str,
|
||||
categories: List[str]
|
||||
) -> Dict[str, Any]:
|
||||
"""Fetch data from Google PageSpeed Insights API"""
|
||||
|
||||
# Build API URL
|
||||
api_url = f"{self.base_url}?url={url}&strategy={strategy}&locale={locale}"
|
||||
|
||||
# Add categories
|
||||
for category in categories:
|
||||
api_url += f"&category={category}"
|
||||
|
||||
# Add API key if available
|
||||
if self.api_key:
|
||||
api_url += f"&key={self.api_key}"
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(api_url, timeout=aiohttp.ClientTimeout(total=60)) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
return data
|
||||
else:
|
||||
error_text = await response.text()
|
||||
logger.error(f"PageSpeed API error {response.status}: {error_text}")
|
||||
|
||||
if response.status == 429:
|
||||
raise Exception("PageSpeed API rate limit exceeded")
|
||||
elif response.status == 400:
|
||||
raise Exception(f"Invalid URL or parameters: {error_text}")
|
||||
else:
|
||||
raise Exception(f"PageSpeed API error: {response.status}")
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
raise Exception("PageSpeed API request timed out")
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching PageSpeed data: {e}")
|
||||
raise
|
||||
|
||||
def _structure_pagespeed_results(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Structure PageSpeed results into organized format"""
|
||||
|
||||
lighthouse_result = data.get("lighthouseResult", {})
|
||||
categories = lighthouse_result.get("categories", {})
|
||||
audits = lighthouse_result.get("audits", {})
|
||||
|
||||
# Extract category scores
|
||||
category_scores = {}
|
||||
for category_name, category_data in categories.items():
|
||||
category_scores[category_name] = {
|
||||
"score": round(category_data.get("score", 0) * 100),
|
||||
"title": category_data.get("title", ""),
|
||||
"description": category_data.get("description", "")
|
||||
}
|
||||
|
||||
# Extract Core Web Vitals
|
||||
core_web_vitals = {}
|
||||
cwv_metrics = ["largest-contentful-paint", "first-input-delay", "cumulative-layout-shift"]
|
||||
|
||||
for metric in cwv_metrics:
|
||||
if metric in audits:
|
||||
audit_data = audits[metric]
|
||||
core_web_vitals[metric] = {
|
||||
"score": audit_data.get("score"),
|
||||
"displayValue": audit_data.get("displayValue"),
|
||||
"numericValue": audit_data.get("numericValue"),
|
||||
"title": audit_data.get("title"),
|
||||
"description": audit_data.get("description")
|
||||
}
|
||||
|
||||
# Extract key metrics
|
||||
key_metrics = {}
|
||||
important_metrics = [
|
||||
"first-contentful-paint",
|
||||
"speed-index",
|
||||
"largest-contentful-paint",
|
||||
"interactive",
|
||||
"total-blocking-time",
|
||||
"cumulative-layout-shift"
|
||||
]
|
||||
|
||||
for metric in important_metrics:
|
||||
if metric in audits:
|
||||
audit_data = audits[metric]
|
||||
key_metrics[metric] = {
|
||||
"score": audit_data.get("score"),
|
||||
"displayValue": audit_data.get("displayValue"),
|
||||
"numericValue": audit_data.get("numericValue"),
|
||||
"title": audit_data.get("title")
|
||||
}
|
||||
|
||||
# Extract opportunities (performance improvements)
|
||||
opportunities = []
|
||||
for audit_id, audit_data in audits.items():
|
||||
if (audit_data.get("scoreDisplayMode") == "numeric" and
|
||||
audit_data.get("score") is not None and
|
||||
audit_data.get("score") < 1 and
|
||||
audit_data.get("details", {}).get("overallSavingsMs", 0) > 0):
|
||||
|
||||
opportunities.append({
|
||||
"id": audit_id,
|
||||
"title": audit_data.get("title", ""),
|
||||
"description": audit_data.get("description", ""),
|
||||
"score": audit_data.get("score", 0),
|
||||
"savings_ms": audit_data.get("details", {}).get("overallSavingsMs", 0),
|
||||
"savings_bytes": audit_data.get("details", {}).get("overallSavingsBytes", 0),
|
||||
"displayValue": audit_data.get("displayValue", "")
|
||||
})
|
||||
|
||||
# Sort opportunities by potential savings
|
||||
opportunities.sort(key=lambda x: x["savings_ms"], reverse=True)
|
||||
|
||||
# Extract diagnostics
|
||||
diagnostics = []
|
||||
for audit_id, audit_data in audits.items():
|
||||
if (audit_data.get("scoreDisplayMode") == "informative" or
|
||||
(audit_data.get("score") is not None and audit_data.get("score") < 1)):
|
||||
|
||||
if audit_id not in [op["id"] for op in opportunities]:
|
||||
diagnostics.append({
|
||||
"id": audit_id,
|
||||
"title": audit_data.get("title", ""),
|
||||
"description": audit_data.get("description", ""),
|
||||
"score": audit_data.get("score"),
|
||||
"displayValue": audit_data.get("displayValue", "")
|
||||
})
|
||||
|
||||
return {
|
||||
"category_scores": category_scores,
|
||||
"core_web_vitals": core_web_vitals,
|
||||
"metrics": key_metrics,
|
||||
"opportunities": opportunities[:10], # Top 10 opportunities
|
||||
"diagnostics": diagnostics[:10] # Top 10 diagnostics
|
||||
}
|
||||
|
||||
async def _generate_ai_insights(
|
||||
self,
|
||||
structured_results: Dict[str, Any],
|
||||
url: str,
|
||||
strategy: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate AI-powered insights and recommendations"""
|
||||
|
||||
try:
|
||||
# Prepare data for AI analysis
|
||||
performance_score = structured_results.get("category_scores", {}).get("performance", {}).get("score", 0)
|
||||
opportunities = structured_results.get("opportunities", [])
|
||||
core_web_vitals = structured_results.get("core_web_vitals", {})
|
||||
|
||||
# Build AI prompt
|
||||
prompt = self._build_ai_analysis_prompt(
|
||||
url, strategy, performance_score, opportunities, core_web_vitals
|
||||
)
|
||||
|
||||
# Generate AI insights
|
||||
ai_response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt=self._get_system_prompt()
|
||||
)
|
||||
|
||||
# Parse AI response
|
||||
insights = self._parse_ai_insights(ai_response)
|
||||
|
||||
# Log AI analysis
|
||||
await seo_logger.log_ai_analysis(
|
||||
tool_name=self.service_name,
|
||||
prompt=prompt,
|
||||
response=ai_response,
|
||||
model_used="gemini-2.0-flash-001"
|
||||
)
|
||||
|
||||
return insights
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating AI insights: {e}")
|
||||
return {
|
||||
"summary": "AI analysis unavailable",
|
||||
"priority_actions": [],
|
||||
"technical_recommendations": [],
|
||||
"business_impact": "Analysis could not be completed"
|
||||
}
|
||||
|
||||
def _build_ai_analysis_prompt(
|
||||
self,
|
||||
url: str,
|
||||
strategy: str,
|
||||
performance_score: int,
|
||||
opportunities: List[Dict],
|
||||
core_web_vitals: Dict
|
||||
) -> str:
|
||||
"""Build AI prompt for performance analysis"""
|
||||
|
||||
opportunities_text = "\n".join([
|
||||
f"- {opp['title']}: {opp['displayValue']} (Potential savings: {opp['savings_ms']}ms)"
|
||||
for opp in opportunities[:5]
|
||||
])
|
||||
|
||||
cwv_text = "\n".join([
|
||||
f"- {metric.replace('-', ' ').title()}: {data.get('displayValue', 'N/A')}"
|
||||
for metric, data in core_web_vitals.items()
|
||||
])
|
||||
|
||||
prompt = f"""
|
||||
Analyze this website performance data and provide actionable insights for digital marketers and content creators:
|
||||
|
||||
Website: {url}
|
||||
Device: {strategy}
|
||||
Performance Score: {performance_score}/100
|
||||
|
||||
Core Web Vitals:
|
||||
{cwv_text}
|
||||
|
||||
Top Performance Opportunities:
|
||||
{opportunities_text}
|
||||
|
||||
Please provide:
|
||||
1. Executive Summary (2-3 sentences for non-technical users)
|
||||
2. Top 3 Priority Actions (specific, actionable steps)
|
||||
3. Technical Recommendations (for developers)
|
||||
4. Business Impact Assessment (how performance affects conversions, SEO, user experience)
|
||||
5. Quick Wins (easy improvements that can be implemented immediately)
|
||||
|
||||
Focus on practical advice that content creators and digital marketers can understand and act upon.
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
def _get_system_prompt(self) -> str:
|
||||
"""Get system prompt for AI analysis"""
|
||||
return """You are a web performance expert specializing in translating technical PageSpeed data into actionable business insights.
|
||||
Your audience includes content creators, digital marketers, and solopreneurs who need to understand how website performance impacts their business goals.
|
||||
|
||||
Provide clear, actionable recommendations that balance technical accuracy with business practicality.
|
||||
Always explain the "why" behind recommendations and their potential impact on user experience, SEO, and conversions.
|
||||
"""
|
||||
|
||||
def _parse_ai_insights(self, ai_response: str) -> Dict[str, Any]:
|
||||
"""Parse AI response into structured insights"""
|
||||
|
||||
# Initialize default structure
|
||||
insights = {
|
||||
"summary": "",
|
||||
"priority_actions": [],
|
||||
"technical_recommendations": [],
|
||||
"business_impact": "",
|
||||
"quick_wins": []
|
||||
}
|
||||
|
||||
try:
|
||||
# Split response into sections
|
||||
sections = ai_response.split('\n\n')
|
||||
|
||||
current_section = None
|
||||
for section in sections:
|
||||
section = section.strip()
|
||||
if not section:
|
||||
continue
|
||||
|
||||
# Identify section type
|
||||
if 'executive summary' in section.lower() or 'summary' in section.lower():
|
||||
insights["summary"] = self._extract_content(section)
|
||||
elif 'priority actions' in section.lower() or 'top 3' in section.lower():
|
||||
insights["priority_actions"] = self._extract_list_items(section)
|
||||
elif 'technical recommendations' in section.lower():
|
||||
insights["technical_recommendations"] = self._extract_list_items(section)
|
||||
elif 'business impact' in section.lower():
|
||||
insights["business_impact"] = self._extract_content(section)
|
||||
elif 'quick wins' in section.lower():
|
||||
insights["quick_wins"] = self._extract_list_items(section)
|
||||
|
||||
# Fallback parsing if sections not clearly identified
|
||||
if not any(insights.values()):
|
||||
insights["summary"] = ai_response[:300] + "..." if len(ai_response) > 300 else ai_response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing AI insights: {e}")
|
||||
insights["summary"] = "AI analysis completed but parsing failed"
|
||||
|
||||
return insights
|
||||
|
||||
def _extract_content(self, section: str) -> str:
|
||||
"""Extract content from a section, removing headers"""
|
||||
lines = section.split('\n')
|
||||
content_lines = []
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line and not line.endswith(':') and not line.startswith('#'):
|
||||
content_lines.append(line)
|
||||
|
||||
return ' '.join(content_lines)
|
||||
|
||||
def _extract_list_items(self, section: str) -> List[str]:
|
||||
"""Extract list items from a section"""
|
||||
items = []
|
||||
lines = section.split('\n')
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line and (line.startswith('-') or line.startswith('*') or
|
||||
line[0].isdigit() and '.' in line[:3]):
|
||||
# Remove bullet points and numbering
|
||||
clean_line = line.lstrip('-*0123456789. ').strip()
|
||||
if clean_line:
|
||||
items.append(clean_line)
|
||||
|
||||
return items[:5] # Limit to 5 items per section
|
||||
|
||||
def _create_optimization_plan(self, structured_results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Create a prioritized optimization plan"""
|
||||
|
||||
opportunities = structured_results.get("opportunities", [])
|
||||
category_scores = structured_results.get("category_scores", {})
|
||||
|
||||
# Calculate priority score for each opportunity
|
||||
prioritized_opportunities = []
|
||||
for opp in opportunities:
|
||||
priority_score = self._calculate_priority_score(opp)
|
||||
prioritized_opportunities.append({
|
||||
**opp,
|
||||
"priority_score": priority_score,
|
||||
"difficulty": self._estimate_difficulty(opp["id"]),
|
||||
"impact": self._estimate_impact(opp["savings_ms"])
|
||||
})
|
||||
|
||||
# Sort by priority score
|
||||
prioritized_opportunities.sort(key=lambda x: x["priority_score"], reverse=True)
|
||||
|
||||
# Create implementation phases
|
||||
phases = {
|
||||
"immediate": [], # High impact, low difficulty
|
||||
"short_term": [], # Medium impact or difficulty
|
||||
"long_term": [] # High difficulty but important
|
||||
}
|
||||
|
||||
for opp in prioritized_opportunities:
|
||||
if opp["difficulty"] == "Low" and opp["impact"] in ["High", "Medium"]:
|
||||
phases["immediate"].append(opp)
|
||||
elif opp["difficulty"] in ["Low", "Medium"]:
|
||||
phases["short_term"].append(opp)
|
||||
else:
|
||||
phases["long_term"].append(opp)
|
||||
|
||||
return {
|
||||
"overall_assessment": self._generate_overall_assessment(category_scores),
|
||||
"prioritized_opportunities": prioritized_opportunities[:10],
|
||||
"implementation_phases": phases,
|
||||
"estimated_improvement": self._estimate_total_improvement(prioritized_opportunities[:5])
|
||||
}
|
||||
|
||||
def _calculate_priority_score(self, opportunity: Dict[str, Any]) -> int:
|
||||
"""Calculate priority score for an opportunity"""
|
||||
savings_ms = opportunity.get("savings_ms", 0)
|
||||
savings_bytes = opportunity.get("savings_bytes", 0)
|
||||
|
||||
# Base score from time savings
|
||||
score = min(savings_ms / 100, 50) # Cap at 50 points
|
||||
|
||||
# Add points for byte savings
|
||||
score += min(savings_bytes / 10000, 25) # Cap at 25 points
|
||||
|
||||
# Bonus points for specific high-impact optimizations
|
||||
high_impact_audits = [
|
||||
"unused-javascript",
|
||||
"render-blocking-resources",
|
||||
"largest-contentful-paint-element",
|
||||
"cumulative-layout-shift"
|
||||
]
|
||||
|
||||
if opportunity.get("id") in high_impact_audits:
|
||||
score += 25
|
||||
|
||||
return min(int(score), 100)
|
||||
|
||||
def _estimate_difficulty(self, audit_id: str) -> str:
|
||||
"""Estimate implementation difficulty"""
|
||||
|
||||
easy_fixes = [
|
||||
"unused-css-rules",
|
||||
"unused-javascript",
|
||||
"render-blocking-resources",
|
||||
"image-size-responsive"
|
||||
]
|
||||
|
||||
medium_fixes = [
|
||||
"largest-contentful-paint-element",
|
||||
"cumulative-layout-shift",
|
||||
"total-blocking-time"
|
||||
]
|
||||
|
||||
if audit_id in easy_fixes:
|
||||
return "Low"
|
||||
elif audit_id in medium_fixes:
|
||||
return "Medium"
|
||||
else:
|
||||
return "High"
|
||||
|
||||
def _estimate_impact(self, savings_ms: int) -> str:
|
||||
"""Estimate performance impact"""
|
||||
if savings_ms >= 1000:
|
||||
return "High"
|
||||
elif savings_ms >= 500:
|
||||
return "Medium"
|
||||
else:
|
||||
return "Low"
|
||||
|
||||
def _generate_overall_assessment(self, category_scores: Dict[str, Any]) -> str:
|
||||
"""Generate overall performance assessment"""
|
||||
|
||||
performance_score = category_scores.get("performance", {}).get("score", 0)
|
||||
|
||||
if performance_score >= 90:
|
||||
return "Excellent performance with minor optimization opportunities"
|
||||
elif performance_score >= 70:
|
||||
return "Good performance with some areas for improvement"
|
||||
elif performance_score >= 50:
|
||||
return "Average performance requiring attention to key areas"
|
||||
else:
|
||||
return "Poor performance requiring immediate optimization efforts"
|
||||
|
||||
def _estimate_total_improvement(self, top_opportunities: List[Dict]) -> Dict[str, Any]:
|
||||
"""Estimate total improvement from top opportunities"""
|
||||
|
||||
total_savings_ms = sum(opp.get("savings_ms", 0) for opp in top_opportunities)
|
||||
total_savings_mb = sum(opp.get("savings_bytes", 0) for opp in top_opportunities) / (1024 * 1024)
|
||||
|
||||
# Estimate score improvement (rough calculation)
|
||||
estimated_score_gain = min(total_savings_ms / 200, 30) # Conservative estimate
|
||||
|
||||
return {
|
||||
"potential_time_savings": f"{total_savings_ms/1000:.1f} seconds",
|
||||
"potential_size_savings": f"{total_savings_mb:.1f} MB",
|
||||
"estimated_score_improvement": f"+{estimated_score_gain:.0f} points",
|
||||
"confidence": "Medium" if total_savings_ms > 1000 else "Low"
|
||||
}
|
||||
|
||||
async def health_check(self) -> Dict[str, Any]:
|
||||
"""Health check for the PageSpeed service"""
|
||||
try:
|
||||
# Test with a simple URL
|
||||
test_url = "https://example.com"
|
||||
result = await self.analyze_pagespeed(test_url, "DESKTOP", "en", ["performance"])
|
||||
|
||||
return {
|
||||
"status": "operational",
|
||||
"service": self.service_name,
|
||||
"api_key_configured": bool(self.api_key),
|
||||
"test_passed": bool(result.get("category_scores")),
|
||||
"last_check": datetime.utcnow().isoformat()
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"status": "error",
|
||||
"service": self.service_name,
|
||||
"error": str(e),
|
||||
"last_check": datetime.utcnow().isoformat()
|
||||
}
|
||||
Reference in New Issue
Block a user