Update on_page_seo_analyzer.py
Additional Insights for Non-Technical Users Content Quality Insights: Readability Score: Use libraries like textstat to calculate a readability score (e.g., Flesch Reading Ease) for the webpage content. Keywords Highlighting: Extract and highlight frequently used keywords in the content, helping users understand what topics are emphasized. Duplicate Content Check: Flag if the meta description or titles are repeated multiple times in the page content. SEO Health Checks: Broken Links Detection: Identify broken internal or external links and recommend fixing them. Image Optimization Tips: Suggest reducing image sizes if the file sizes exceed a certain threshold. Recommend modern formats like WebP for better performance. Alt Text Suggestions: Provide actionable suggestions for missing or insufficient alt text, such as "Describe the image's purpose or key elements." Social Media Enhancement: Suggest best practices for Open Graph and Twitter tags, such as recommended tag content length or formats. Generate suggested meta descriptions and Open Graph descriptions for improved click-through rates. Accessibility Recommendations: Heading Structure Audit: Check for skipped heading levels (e.g., h2 follows h4) and provide guidance on correcting them. Contrast Ratio Check: Flag potential text-to-background contrast issues for visually impaired users (can use APIs like Lighthouse). ARIA Tags: Check for the presence of ARIA (Accessible Rich Internet Applications) tags and recommend their addition if missing. Performance Insights: Lazy Loading Suggestions: Highlight images without loading="lazy" and recommend lazy loading to improve page load speed. Critical CSS Suggestions: Advise inlining critical CSS for faster initial render. Script Optimization: Highlight unminified or unused JavaScript and recommend optimization. Custom Recommendations: Call to Action (CTA) Suggestions: Analyze the text for actionable elements like buttons or links and recommend improving CTAs. Internal Linking Suggestions: Suggest adding internal links for keywords or headings that lack links. Schema Markup Expansion: Recommend additional schema types (e.g., FAQ, Product, Review) based on the page content. Mobile Friendliness Enhancements: Check for touch targets (buttons and links) being too small or too close together. Flag pages without mobile-friendly navigation menus. Enhancements to User Experience Highlight Strengths and Weaknesses: Use color-coded sections to differentiate between well-optimized and underperforming areas. Simplified Metrics: Break down complex scores (like PageSpeed or SEO scores) into "Good," "Needs Improvement," and "Poor" categories. Provide plain-English explanations for non-technical users. Recommendations Section: Provide step-by-step instructions or examples for fixing identified issues, such as "How to add a canonical tag" or "How to structure hreflang attributes." Actionable Insights Dashboard: Summarize all findings in a visually appealing dashboard with prioritized to-do lists. Export Reports: Allow users to export the analysis and recommendations in a PDF or CSV format for easier sharing and tracking.
This commit is contained in:
@@ -7,6 +7,13 @@ from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import csv
|
||||
import time
|
||||
from urllib.parse import urlparse
|
||||
import validators
|
||||
from readability import Readability
|
||||
import textstat
|
||||
import re
|
||||
from PIL import Image
|
||||
import io
|
||||
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
def fetch_and_parse_html(url):
|
||||
@@ -65,6 +72,121 @@ def extract_meta_data(soup):
|
||||
st.warning(f"⚠️ Error extracting meta data: {e}")
|
||||
return {}
|
||||
|
||||
def analyze_headings(soup):
|
||||
"""
|
||||
Analyzes the headings on the webpage.
|
||||
|
||||
Args:
|
||||
soup (BeautifulSoup): Parsed HTML content.
|
||||
|
||||
Returns:
|
||||
dict: Count of each heading tag.
|
||||
"""
|
||||
try:
|
||||
headings = {
|
||||
'h1': len(soup.find_all('h1')),
|
||||
'h2': len(soup.find_all('h2')),
|
||||
'h3': len(soup.find_all('h3')),
|
||||
'h4': len(soup.find_all('h4')),
|
||||
'h5': len(soup.find_all('h5')),
|
||||
'h6': len(soup.find_all('h6'))
|
||||
}
|
||||
return headings
|
||||
except Exception as e:
|
||||
st.warning(f"⚠️ Error analyzing headings: {e}")
|
||||
return {}
|
||||
|
||||
def check_readability(text):
|
||||
"""
|
||||
Checks the readability score of the text.
|
||||
|
||||
Args:
|
||||
text (str): The text content of the webpage.
|
||||
|
||||
Returns:
|
||||
float: Readability score.
|
||||
"""
|
||||
try:
|
||||
readability_score = textstat.flesch_reading_ease(text)
|
||||
return readability_score
|
||||
except Exception as e:
|
||||
st.warning(f"⚠️ Error checking readability: {e}")
|
||||
return None
|
||||
|
||||
def analyze_images(soup, url):
|
||||
"""
|
||||
Analyzes the images on the webpage.
|
||||
|
||||
Args:
|
||||
soup (BeautifulSoup): Parsed HTML content.
|
||||
url (str): The URL of the webpage.
|
||||
|
||||
Returns:
|
||||
list: List of dictionaries containing image src and alt text.
|
||||
"""
|
||||
try:
|
||||
images = soup.find_all('img')
|
||||
image_data = []
|
||||
for img in images:
|
||||
src = img.get('src')
|
||||
if not src:
|
||||
continue
|
||||
if not validators.url(src):
|
||||
src = urlparse(url).scheme + '://' + urlparse(url).netloc + src
|
||||
alt_text = img.get('alt', '')
|
||||
image_data.append({'src': src, 'alt': alt_text})
|
||||
return image_data
|
||||
except Exception as e:
|
||||
st.warning(f"⚠️ Error analyzing images: {e}")
|
||||
return []
|
||||
|
||||
def analyze_links(soup):
|
||||
"""
|
||||
Analyzes the links on the webpage.
|
||||
|
||||
Args:
|
||||
soup (BeautifulSoup): Parsed HTML content.
|
||||
|
||||
Returns:
|
||||
list: List of broken links.
|
||||
"""
|
||||
try:
|
||||
links = soup.find_all('a', href=True)
|
||||
broken_links = []
|
||||
for link in links:
|
||||
href = link['href']
|
||||
if not validators.url(href):
|
||||
continue
|
||||
try:
|
||||
response = requests.head(href, timeout=5)
|
||||
if response.status_code >= 400:
|
||||
broken_links.append(href)
|
||||
except requests.RequestException:
|
||||
broken_links.append(href)
|
||||
return broken_links
|
||||
except Exception as e:
|
||||
st.warning(f"⚠️ Error analyzing links: {e}")
|
||||
return []
|
||||
|
||||
def suggest_ctas(soup):
|
||||
"""
|
||||
Suggests call-to-action phrases present on the webpage.
|
||||
|
||||
Args:
|
||||
soup (BeautifulSoup): Parsed HTML content.
|
||||
|
||||
Returns:
|
||||
list: List of found CTA phrases.
|
||||
"""
|
||||
try:
|
||||
cta_keywords = ['buy now', 'subscribe', 'learn more', 'sign up', 'get started']
|
||||
text = soup.get_text().lower()
|
||||
ctas_found = [cta for cta in cta_keywords if cta in text]
|
||||
return ctas_found
|
||||
except Exception as e:
|
||||
st.warning(f"⚠️ Error suggesting CTAs: {e}")
|
||||
return []
|
||||
|
||||
def extract_alternates_and_canonicals(soup):
|
||||
"""
|
||||
Extracts canonical URL, hreflangs, and mobile alternate links from the parsed HTML.
|
||||
@@ -167,7 +289,7 @@ def extract_content_data(soup, url):
|
||||
link_insights = []
|
||||
if internal_links:
|
||||
link_insights.append("✅ Internal links are present.")
|
||||
if external_links:
|
||||
if external links:
|
||||
link_insights.append("✅ External links are present.")
|
||||
|
||||
return {
|
||||
@@ -313,6 +435,12 @@ def fetch_seo_data(url):
|
||||
return {}
|
||||
|
||||
meta_data = extract_meta_data(soup)
|
||||
headings = analyze_headings(soup)
|
||||
text = soup.get_text()
|
||||
readability_score = check_readability(text)
|
||||
images = analyze_images(soup, url)
|
||||
broken_links = analyze_links(soup)
|
||||
ctas = suggest_ctas(soup)
|
||||
alternates_and_canonicals = extract_alternates_and_canonicals(soup)
|
||||
schema_markup = extract_schema_markup(soup)
|
||||
content_data = extract_content_data(soup, url)
|
||||
@@ -320,6 +448,11 @@ def fetch_seo_data(url):
|
||||
|
||||
return {
|
||||
"meta_data": meta_data,
|
||||
"headings": headings,
|
||||
"readability_score": readability_score,
|
||||
"images": images,
|
||||
"broken_links": broken_links,
|
||||
"ctas": ctas,
|
||||
"alternates_and_canonicals": alternates_and_canonicals,
|
||||
"schema_markup": schema_markup,
|
||||
"content_data": content_data,
|
||||
@@ -371,6 +504,21 @@ def analyze_onpage_seo():
|
||||
st.write(f"**Language:** {results['meta_data']['html_language']}")
|
||||
st.write(results['meta_data']['title_message'])
|
||||
st.write(results['meta_data']['description_message'])
|
||||
|
||||
st.subheader("Headings")
|
||||
st.write(results['headings'])
|
||||
|
||||
st.subheader("Readability Score")
|
||||
st.write(f"**Readability Score:** {results['readability_score']}")
|
||||
|
||||
st.subheader("Images")
|
||||
st.write(results['images'])
|
||||
|
||||
st.subheader("Broken Links")
|
||||
st.write(results['broken_links'])
|
||||
|
||||
st.subheader("Suggested CTAs")
|
||||
st.write(results['ctas'])
|
||||
|
||||
st.subheader("Canonical and Hreflangs")
|
||||
st.write(f"**Canonical:** {results['alternates_and_canonicals']['canonical']}")
|
||||
|
||||
Reference in New Issue
Block a user