ALwrity Chatbot, SEO, Social media, Settings, Dashboard UI styling changes

This commit is contained in:
ajaysi
2025-06-08 05:59:22 +05:30
parent fad9647b46
commit bbe56a364d
24 changed files with 7248 additions and 2222 deletions

View File

@@ -1,5 +1,11 @@
import streamlit as st
import advertools as adv
import pandas as pd
from urllib.parse import urlparse
import requests
from datetime import datetime
import tempfile
import os
# Title and introduction
@@ -74,19 +80,279 @@ def show_keyword_insights(netloc, path):
""")
# Main function to run the analysis
# Enhanced HTTP Headers Analysis using advertools
def analyze_http_headers(url):
"""Analyze HTTP headers using advertools for comprehensive SEO insights."""
st.subheader("🔍 Advanced HTTP Headers Analysis")
st.write("---")
try:
with st.spinner("Analyzing HTTP headers..."):
# Create a temporary file for output
with tempfile.NamedTemporaryFile(mode='w', suffix='.jl', delete=False) as tmp_file:
temp_filename = tmp_file.name
# Use advertools to crawl headers
adv.crawl_headers([url], temp_filename)
# Read the results
headers_df = pd.read_json(temp_filename, lines=True)
# Clean up temp file
os.unlink(temp_filename)
if not headers_df.empty:
# Display key SEO-relevant headers
st.success("✅ Successfully analyzed HTTP headers!")
# Create tabs for different header categories
tab1, tab2, tab3, tab4 = st.tabs(["🔒 Security", "📈 SEO Headers", "⚡ Performance", "📊 Technical Details"])
with tab1:
st.write("### Security Headers Analysis")
security_headers = {
'resp_headers_X-Frame-Options': 'X-Frame-Options',
'resp_headers_X-Content-Type-Options': 'X-Content-Type-Options',
'resp_headers_X-XSS-Protection': 'X-XSS-Protection',
'resp_headers_Strict-Transport-Security': 'Strict-Transport-Security',
'resp_headers_Content-Security-Policy': 'Content-Security-Policy',
'resp_headers_Referrer-Policy': 'Referrer-Policy'
}
for header_key, header_name in security_headers.items():
if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
st.success(f"✅ **{header_name}**: Present")
with st.expander(f"View {header_name} Details"):
st.code(headers_df[header_key].iloc[0])
else:
st.warning(f"⚠️ **{header_name}**: Missing")
st.info(f"💡 **Recommendation**: Add {header_name} header for better security")
with tab2:
st.write("### SEO-Related Headers")
seo_headers = {
'resp_headers_Content-Type': 'Content-Type',
'resp_headers_Content-Language': 'Content-Language',
'resp_headers_Cache-Control': 'Cache-Control',
'resp_headers_Expires': 'Expires',
'resp_headers_Last-Modified': 'Last-Modified',
'resp_headers_ETag': 'ETag'
}
for header_key, header_name in seo_headers.items():
if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
st.success(f"✅ **{header_name}**: {headers_df[header_key].iloc[0]}")
else:
st.info(f" **{header_name}**: Not set or not detected")
# Special handling for content-type
if 'resp_headers_Content-Type' in headers_df.columns:
content_type = headers_df['resp_headers_Content-Type'].iloc[0]
if 'text/html' in str(content_type):
st.success("🎯 **Content-Type**: Properly set for HTML content")
if 'charset=utf-8' in str(content_type):
st.success("🌍 **Character Encoding**: UTF-8 detected - Great for international SEO!")
with tab3:
st.write("### Performance Headers")
perf_headers = {
'resp_headers_Server': 'Server',
'resp_headers_X-Powered-By': 'X-Powered-By',
'resp_headers_Connection': 'Connection',
'resp_headers_Transfer-Encoding': 'Transfer-Encoding',
'resp_headers_Content-Encoding': 'Content-Encoding',
'resp_headers_Content-Length': 'Content-Length'
}
for header_key, header_name in perf_headers.items():
if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
st.info(f"📊 **{header_name}**: {headers_df[header_key].iloc[0]}")
# Check for compression
if 'resp_headers_Content-Encoding' in headers_df.columns:
encoding = headers_df['resp_headers_Content-Encoding'].iloc[0]
if 'gzip' in str(encoding) or 'br' in str(encoding):
st.success("🚀 **Compression**: Enabled - Great for page speed!")
else:
st.warning("⚠️ **Compression**: Consider enabling GZIP or Brotli compression")
else:
st.warning("⚠️ **Compression**: Not detected - Consider enabling compression")
# Check status code
if 'status' in headers_df.columns:
status = headers_df['status'].iloc[0]
if status == 200:
st.success(f"✅ **HTTP Status**: {status} OK")
else:
st.warning(f"⚠️ **HTTP Status**: {status}")
with tab4:
st.write("### Complete Headers Analysis")
# Show response headers only (more relevant for SEO)
response_headers = {col: col.replace('resp_headers_', '') for col in headers_df.columns if col.startswith('resp_headers_')}
if response_headers:
st.write("**Response Headers:**")
for col, display_name in response_headers.items():
if not pd.isna(headers_df[col].iloc[0]):
st.write(f"**{display_name}**: `{headers_df[col].iloc[0]}`")
# Show crawl metadata
st.write("**Crawl Information:**")
metadata_cols = ['url', 'status', 'crawl_time', 'download_latency']
for col in metadata_cols:
if col in headers_df.columns:
st.write(f"**{col.replace('_', ' ').title()}**: `{headers_df[col].iloc[0]}`")
# Download option
csv = headers_df.to_csv(index=False)
st.download_button(
label="📥 Download Complete Headers Data as CSV",
data=csv,
file_name=f"headers_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
mime="text/csv"
)
else:
st.error("❌ Could not retrieve headers data")
except Exception as e:
st.error(f"❌ Error analyzing headers: {str(e)}")
st.info("💡 **Tip**: Make sure the URL is accessible and try again")
# Enhanced robots.txt and sitemap detection
def check_robots_and_sitemap(url):
"""Check for robots.txt and sitemap files."""
st.subheader("🤖 Robots.txt & Sitemap Detection")
st.write("---")
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
# Check robots.txt
try:
robots_url = f"{base_url}/robots.txt"
response = requests.get(robots_url, timeout=10)
if response.status_code == 200:
st.success(f"✅ **Robots.txt found**: {robots_url}")
with st.expander("View robots.txt content"):
st.code(response.text[:1000]) # Show first 1000 characters
else:
st.warning(f"⚠️ **Robots.txt not found**: Consider creating one at {robots_url}")
except:
st.error("❌ Could not check robots.txt")
# Check common sitemap locations
sitemap_locations = [
f"{base_url}/sitemap.xml",
f"{base_url}/sitemap_index.xml",
f"{base_url}/sitemaps.xml"
]
sitemap_found = False
for sitemap_url in sitemap_locations:
try:
response = requests.get(sitemap_url, timeout=10)
if response.status_code == 200:
st.success(f"✅ **Sitemap found**: {sitemap_url}")
sitemap_found = True
break
except:
continue
if not sitemap_found:
st.warning("⚠️ **Sitemap not found**: Consider creating an XML sitemap")
st.info("💡 **Recommendation**: Submit your sitemap to Google Search Console")
# Enhanced URL structure analysis
def enhanced_url_analysis(url):
"""Provide enhanced URL structure analysis."""
st.subheader("🔗 Enhanced URL Structure Analysis")
st.write("---")
parsed_url = urlparse(url)
# URL components analysis
col1, col2 = st.columns(2)
with col1:
st.write("**URL Components:**")
st.info(f"**Protocol**: {parsed_url.scheme}")
st.info(f"**Domain**: {parsed_url.netloc}")
st.info(f"**Path**: {parsed_url.path}")
if parsed_url.query:
st.info(f"**Query**: {parsed_url.query}")
if parsed_url.fragment:
st.info(f"**Fragment**: {parsed_url.fragment}")
with col2:
st.write("**SEO Analysis:**")
# URL length analysis
url_length = len(url)
if url_length <= 60:
st.success(f"✅ **URL Length**: {url_length} characters (Excellent)")
elif url_length <= 100:
st.warning(f"⚠️ **URL Length**: {url_length} characters (Good, but could be shorter)")
else:
st.error(f"❌ **URL Length**: {url_length} characters (Too long)")
# Path depth analysis
path_segments = [seg for seg in parsed_url.path.split('/') if seg]
depth = len(path_segments)
if depth <= 3:
st.success(f"✅ **URL Depth**: {depth} levels (Good)")
else:
st.warning(f"⚠️ **URL Depth**: {depth} levels (Consider flattening)")
# Special characters check
special_chars = set(url) - set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~:/?#[]@!$&\'()*+,;=')
if not special_chars:
st.success("✅ **Special Characters**: Clean URL structure")
else:
st.warning(f"⚠️ **Special Characters**: Found {len(special_chars)} special characters")
# Enhanced main function to run the analysis
def run_analysis(url):
# Parse the URL
parsed_url = urlparse(url)
netloc = parsed_url.netloc # Domain name
path = parsed_url.path # Path after the domain
# Run checks
# Run existing checks
check_https(url)
check_url_length(path)
check_hyphens(path)
check_file_extension(path)
# Add new enhanced analyses
enhanced_url_analysis(url)
analyze_http_headers(url)
check_robots_and_sitemap(url)
# Keep existing keyword insights
show_keyword_insights(netloc, path)
# Add summary section
st.subheader("📋 Analysis Summary & Recommendations")
st.write("---")
st.success("🎉 **Analysis Complete!** Review the findings above and implement the recommendations for better SEO performance.")
recommendations = [
"✅ Ensure HTTPS is enabled for security and SEO benefits",
"🔗 Keep URLs short, descriptive, and user-friendly",
"🔒 Implement security headers to protect your site",
"🤖 Create and maintain robots.txt and XML sitemaps",
"⚡ Enable compression and optimize HTTP headers for performance",
"📊 Monitor your URL structure and avoid excessive depth"
]
st.write("**Key Recommendations:**")
for rec in recommendations:
st.write(rec)
# Display the app