Update on_page_seo_analyzer.py

Additional Insights for Non-Technical Users
Content Quality Insights:

Readability Score: Use libraries like textstat to calculate a readability score (e.g., Flesch Reading Ease) for the webpage content.
Keywords Highlighting: Extract and highlight frequently used keywords in the content, helping users understand what topics are emphasized.
Duplicate Content Check: Flag if the meta description or titles are repeated multiple times in the page content.
SEO Health Checks:

Broken Links Detection: Identify broken internal or external links and recommend fixing them.
Image Optimization Tips:
Suggest reducing image sizes if the file sizes exceed a certain threshold.
Recommend modern formats like WebP for better performance.
Alt Text Suggestions: Provide actionable suggestions for missing or insufficient alt text, such as "Describe the image's purpose or key elements."
Social Media Enhancement:

Suggest best practices for Open Graph and Twitter tags, such as recommended tag content length or formats.
Generate suggested meta descriptions and Open Graph descriptions for improved click-through rates.
Accessibility Recommendations:

Heading Structure Audit: Check for skipped heading levels (e.g., h2 follows h4) and provide guidance on correcting them.
Contrast Ratio Check: Flag potential text-to-background contrast issues for visually impaired users (can use APIs like Lighthouse).
ARIA Tags: Check for the presence of ARIA (Accessible Rich Internet Applications) tags and recommend their addition if missing.
Performance Insights:

Lazy Loading Suggestions: Highlight images without loading="lazy" and recommend lazy loading to improve page load speed.
Critical CSS Suggestions: Advise inlining critical CSS for faster initial render.
Script Optimization: Highlight unminified or unused JavaScript and recommend optimization.
Custom Recommendations:

Call to Action (CTA) Suggestions: Analyze the text for actionable elements like buttons or links and recommend improving CTAs.
Internal Linking Suggestions: Suggest adding internal links for keywords or headings that lack links.
Schema Markup Expansion: Recommend additional schema types (e.g., FAQ, Product, Review) based on the page content.
Mobile Friendliness Enhancements:

Check for touch targets (buttons and links) being too small or too close together.
Flag pages without mobile-friendly navigation menus.
Enhancements to User Experience
Highlight Strengths and Weaknesses: Use color-coded sections to differentiate between well-optimized and underperforming areas.

Simplified Metrics:

Break down complex scores (like PageSpeed or SEO scores) into "Good," "Needs Improvement," and "Poor" categories.
Provide plain-English explanations for non-technical users.
Recommendations Section:

Provide step-by-step instructions or examples for fixing identified issues, such as "How to add a canonical tag" or "How to structure hreflang attributes."
Actionable Insights Dashboard: Summarize all findings in a visually appealing dashboard with prioritized to-do lists.

Export Reports: Allow users to export the analysis and recommendations in a PDF or CSV format for easier sharing and tracking.
This commit is contained in:
ي
2025-01-17 12:23:10 +05:30
committed by GitHub
parent 6bfc851a1c
commit c9b22b3653

View File

@@ -7,6 +7,13 @@ from bs4 import BeautifulSoup
import requests
import csv
import time
from urllib.parse import urlparse
import validators
from readability import Readability
import textstat
import re
from PIL import Image
import io
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
def fetch_and_parse_html(url):
@@ -65,6 +72,121 @@ def extract_meta_data(soup):
st.warning(f"⚠️ Error extracting meta data: {e}")
return {}
def analyze_headings(soup):
"""
Analyzes the headings on the webpage.
Args:
soup (BeautifulSoup): Parsed HTML content.
Returns:
dict: Count of each heading tag.
"""
try:
headings = {
'h1': len(soup.find_all('h1')),
'h2': len(soup.find_all('h2')),
'h3': len(soup.find_all('h3')),
'h4': len(soup.find_all('h4')),
'h5': len(soup.find_all('h5')),
'h6': len(soup.find_all('h6'))
}
return headings
except Exception as e:
st.warning(f"⚠️ Error analyzing headings: {e}")
return {}
def check_readability(text):
"""
Checks the readability score of the text.
Args:
text (str): The text content of the webpage.
Returns:
float: Readability score.
"""
try:
readability_score = textstat.flesch_reading_ease(text)
return readability_score
except Exception as e:
st.warning(f"⚠️ Error checking readability: {e}")
return None
def analyze_images(soup, url):
"""
Analyzes the images on the webpage.
Args:
soup (BeautifulSoup): Parsed HTML content.
url (str): The URL of the webpage.
Returns:
list: List of dictionaries containing image src and alt text.
"""
try:
images = soup.find_all('img')
image_data = []
for img in images:
src = img.get('src')
if not src:
continue
if not validators.url(src):
src = urlparse(url).scheme + '://' + urlparse(url).netloc + src
alt_text = img.get('alt', '')
image_data.append({'src': src, 'alt': alt_text})
return image_data
except Exception as e:
st.warning(f"⚠️ Error analyzing images: {e}")
return []
def analyze_links(soup):
"""
Analyzes the links on the webpage.
Args:
soup (BeautifulSoup): Parsed HTML content.
Returns:
list: List of broken links.
"""
try:
links = soup.find_all('a', href=True)
broken_links = []
for link in links:
href = link['href']
if not validators.url(href):
continue
try:
response = requests.head(href, timeout=5)
if response.status_code >= 400:
broken_links.append(href)
except requests.RequestException:
broken_links.append(href)
return broken_links
except Exception as e:
st.warning(f"⚠️ Error analyzing links: {e}")
return []
def suggest_ctas(soup):
"""
Suggests call-to-action phrases present on the webpage.
Args:
soup (BeautifulSoup): Parsed HTML content.
Returns:
list: List of found CTA phrases.
"""
try:
cta_keywords = ['buy now', 'subscribe', 'learn more', 'sign up', 'get started']
text = soup.get_text().lower()
ctas_found = [cta for cta in cta_keywords if cta in text]
return ctas_found
except Exception as e:
st.warning(f"⚠️ Error suggesting CTAs: {e}")
return []
def extract_alternates_and_canonicals(soup):
"""
Extracts canonical URL, hreflangs, and mobile alternate links from the parsed HTML.
@@ -167,7 +289,7 @@ def extract_content_data(soup, url):
link_insights = []
if internal_links:
link_insights.append("✅ Internal links are present.")
if external_links:
if external links:
link_insights.append("✅ External links are present.")
return {
@@ -313,6 +435,12 @@ def fetch_seo_data(url):
return {}
meta_data = extract_meta_data(soup)
headings = analyze_headings(soup)
text = soup.get_text()
readability_score = check_readability(text)
images = analyze_images(soup, url)
broken_links = analyze_links(soup)
ctas = suggest_ctas(soup)
alternates_and_canonicals = extract_alternates_and_canonicals(soup)
schema_markup = extract_schema_markup(soup)
content_data = extract_content_data(soup, url)
@@ -320,6 +448,11 @@ def fetch_seo_data(url):
return {
"meta_data": meta_data,
"headings": headings,
"readability_score": readability_score,
"images": images,
"broken_links": broken_links,
"ctas": ctas,
"alternates_and_canonicals": alternates_and_canonicals,
"schema_markup": schema_markup,
"content_data": content_data,
@@ -371,6 +504,21 @@ def analyze_onpage_seo():
st.write(f"**Language:** {results['meta_data']['html_language']}")
st.write(results['meta_data']['title_message'])
st.write(results['meta_data']['description_message'])
st.subheader("Headings")
st.write(results['headings'])
st.subheader("Readability Score")
st.write(f"**Readability Score:** {results['readability_score']}")
st.subheader("Images")
st.write(results['images'])
st.subheader("Broken Links")
st.write(results['broken_links'])
st.subheader("Suggested CTAs")
st.write(results['ctas'])
st.subheader("Canonical and Hreflangs")
st.write(f"**Canonical:** {results['alternates_and_canonicals']['canonical']}")