ALwrity Chatbot, SEO, Social media, Settings, Dashboard UI styling changes

2025-06-08 05:59:22 +05:30
parent fad9647b46
commit bbe56a364d
24 changed files with 7248 additions and 2222 deletions
--- a/lib/ai_seo_tools/weburl_seo_checker.py
+++ b/lib/ai_seo_tools/weburl_seo_checker.py
@@ -1,5 +1,11 @@
 import streamlit as st
+import advertools as adv
+import pandas as pd
 from urllib.parse import urlparse
+import requests
+from datetime import datetime
+import tempfile
+import os


 # Title and introduction
@@ -74,19 +80,279 @@ def show_keyword_insights(netloc, path):
    """)


-# Main function to run the analysis
+# Enhanced HTTP Headers Analysis using advertools
+def analyze_http_headers(url):
+    """Analyze HTTP headers using advertools for comprehensive SEO insights."""
+    st.subheader("🔍 Advanced HTTP Headers Analysis")
+    st.write("---")
+    
+    try:
+        with st.spinner("Analyzing HTTP headers..."):
+            # Create a temporary file for output
+            with tempfile.NamedTemporaryFile(mode='w', suffix='.jl', delete=False) as tmp_file:
+                temp_filename = tmp_file.name
+            
+            # Use advertools to crawl headers
+            adv.crawl_headers([url], temp_filename)
+            
+            # Read the results
+            headers_df = pd.read_json(temp_filename, lines=True)
+            
+            # Clean up temp file
+            os.unlink(temp_filename)
+        
+        if not headers_df.empty:
+            # Display key SEO-relevant headers
+            st.success("✅ Successfully analyzed HTTP headers!")
+            
+            # Create tabs for different header categories
+            tab1, tab2, tab3, tab4 = st.tabs(["🔒 Security", "📈 SEO Headers", "⚡ Performance", "📊 Technical Details"])
+            
+            with tab1:
+                st.write("### Security Headers Analysis")
+                security_headers = {
+                    'resp_headers_X-Frame-Options': 'X-Frame-Options',
+                    'resp_headers_X-Content-Type-Options': 'X-Content-Type-Options',
+                    'resp_headers_X-XSS-Protection': 'X-XSS-Protection',
+                    'resp_headers_Strict-Transport-Security': 'Strict-Transport-Security',
+                    'resp_headers_Content-Security-Policy': 'Content-Security-Policy',
+                    'resp_headers_Referrer-Policy': 'Referrer-Policy'
+                }
+                
+                for header_key, header_name in security_headers.items():
+                    if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
+                        st.success(f"✅ **{header_name}**: Present")
+                        with st.expander(f"View {header_name} Details"):
+                            st.code(headers_df[header_key].iloc[0])
+                    else:
+                        st.warning(f"⚠️ **{header_name}**: Missing")
+                        st.info(f"💡 **Recommendation**: Add {header_name} header for better security")
+            
+            with tab2:
+                st.write("### SEO-Related Headers")
+                seo_headers = {
+                    'resp_headers_Content-Type': 'Content-Type',
+                    'resp_headers_Content-Language': 'Content-Language',
+                    'resp_headers_Cache-Control': 'Cache-Control',
+                    'resp_headers_Expires': 'Expires',
+                    'resp_headers_Last-Modified': 'Last-Modified',
+                    'resp_headers_ETag': 'ETag'
+                }
+                
+                for header_key, header_name in seo_headers.items():
+                    if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
+                        st.success(f"✅ **{header_name}**: {headers_df[header_key].iloc[0]}")
+                    else:
+                        st.info(f"ℹ️ **{header_name}**: Not set or not detected")
+                
+                # Special handling for content-type
+                if 'resp_headers_Content-Type' in headers_df.columns:
+                    content_type = headers_df['resp_headers_Content-Type'].iloc[0]
+                    if 'text/html' in str(content_type):
+                        st.success("🎯 **Content-Type**: Properly set for HTML content")
+                    if 'charset=utf-8' in str(content_type):
+                        st.success("🌍 **Character Encoding**: UTF-8 detected - Great for international SEO!")
+            
+            with tab3:
+                st.write("### Performance Headers")
+                perf_headers = {
+                    'resp_headers_Server': 'Server',
+                    'resp_headers_X-Powered-By': 'X-Powered-By',
+                    'resp_headers_Connection': 'Connection',
+                    'resp_headers_Transfer-Encoding': 'Transfer-Encoding',
+                    'resp_headers_Content-Encoding': 'Content-Encoding',
+                    'resp_headers_Content-Length': 'Content-Length'
+                }
+                
+                for header_key, header_name in perf_headers.items():
+                    if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
+                        st.info(f"📊 **{header_name}**: {headers_df[header_key].iloc[0]}")
+                
+                # Check for compression
+                if 'resp_headers_Content-Encoding' in headers_df.columns:
+                    encoding = headers_df['resp_headers_Content-Encoding'].iloc[0]
+                    if 'gzip' in str(encoding) or 'br' in str(encoding):
+                        st.success("🚀 **Compression**: Enabled - Great for page speed!")
+                    else:
+                        st.warning("⚠️ **Compression**: Consider enabling GZIP or Brotli compression")
+                else:
+                    st.warning("⚠️ **Compression**: Not detected - Consider enabling compression")
+                
+                # Check status code
+                if 'status' in headers_df.columns:
+                    status = headers_df['status'].iloc[0]
+                    if status == 200:
+                        st.success(f"✅ **HTTP Status**: {status} OK")
+                    else:
+                        st.warning(f"⚠️ **HTTP Status**: {status}")
+            
+            with tab4:
+                st.write("### Complete Headers Analysis")
+                
+                # Show response headers only (more relevant for SEO)
+                response_headers = {col: col.replace('resp_headers_', '') for col in headers_df.columns if col.startswith('resp_headers_')}
+                if response_headers:
+                    st.write("**Response Headers:**")
+                    for col, display_name in response_headers.items():
+                        if not pd.isna(headers_df[col].iloc[0]):
+                            st.write(f"**{display_name}**: `{headers_df[col].iloc[0]}`")
+                
+                # Show crawl metadata
+                st.write("**Crawl Information:**")
+                metadata_cols = ['url', 'status', 'crawl_time', 'download_latency']
+                for col in metadata_cols:
+                    if col in headers_df.columns:
+                        st.write(f"**{col.replace('_', ' ').title()}**: `{headers_df[col].iloc[0]}`")
+                
+                # Download option
+                csv = headers_df.to_csv(index=False)
+                st.download_button(
+                    label="📥 Download Complete Headers Data as CSV",
+                    data=csv,
+                    file_name=f"headers_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
+                    mime="text/csv"
+                )
+        
+        else:
+            st.error("❌ Could not retrieve headers data")
+            
+    except Exception as e:
+        st.error(f"❌ Error analyzing headers: {str(e)}")
+        st.info("💡 **Tip**: Make sure the URL is accessible and try again")
+
+
+# Enhanced robots.txt and sitemap detection
+def check_robots_and_sitemap(url):
+    """Check for robots.txt and sitemap files."""
+    st.subheader("🤖 Robots.txt & Sitemap Detection")
+    st.write("---")
+    
+    parsed_url = urlparse(url)
+    base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+    
+    # Check robots.txt
+    try:
+        robots_url = f"{base_url}/robots.txt"
+        response = requests.get(robots_url, timeout=10)
+        if response.status_code == 200:
+            st.success(f"✅ **Robots.txt found**: {robots_url}")
+            with st.expander("View robots.txt content"):
+                st.code(response.text[:1000])  # Show first 1000 characters
+        else:
+            st.warning(f"⚠️ **Robots.txt not found**: Consider creating one at {robots_url}")
+    except:
+        st.error("❌ Could not check robots.txt")
+    
+    # Check common sitemap locations
+    sitemap_locations = [
+        f"{base_url}/sitemap.xml",
+        f"{base_url}/sitemap_index.xml",
+        f"{base_url}/sitemaps.xml"
+    ]
+    
+    sitemap_found = False
+    for sitemap_url in sitemap_locations:
+        try:
+            response = requests.get(sitemap_url, timeout=10)
+            if response.status_code == 200:
+                st.success(f"✅ **Sitemap found**: {sitemap_url}")
+                sitemap_found = True
+                break
+        except:
+            continue
+    
+    if not sitemap_found:
+        st.warning("⚠️ **Sitemap not found**: Consider creating an XML sitemap")
+        st.info("💡 **Recommendation**: Submit your sitemap to Google Search Console")
+
+
+# Enhanced URL structure analysis
+def enhanced_url_analysis(url):
+    """Provide enhanced URL structure analysis."""
+    st.subheader("🔗 Enhanced URL Structure Analysis")
+    st.write("---")
+    
+    parsed_url = urlparse(url)
+    
+    # URL components analysis
+    col1, col2 = st.columns(2)
+    
+    with col1:
+        st.write("**URL Components:**")
+        st.info(f"**Protocol**: {parsed_url.scheme}")
+        st.info(f"**Domain**: {parsed_url.netloc}")
+        st.info(f"**Path**: {parsed_url.path}")
+        if parsed_url.query:
+            st.info(f"**Query**: {parsed_url.query}")
+        if parsed_url.fragment:
+            st.info(f"**Fragment**: {parsed_url.fragment}")
+    
+    with col2:
+        st.write("**SEO Analysis:**")
+        
+        # URL length analysis
+        url_length = len(url)
+        if url_length <= 60:
+            st.success(f"✅ **URL Length**: {url_length} characters (Excellent)")
+        elif url_length <= 100:
+            st.warning(f"⚠️ **URL Length**: {url_length} characters (Good, but could be shorter)")
+        else:
+            st.error(f"❌ **URL Length**: {url_length} characters (Too long)")
+        
+        # Path depth analysis
+        path_segments = [seg for seg in parsed_url.path.split('/') if seg]
+        depth = len(path_segments)
+        if depth <= 3:
+            st.success(f"✅ **URL Depth**: {depth} levels (Good)")
+        else:
+            st.warning(f"⚠️ **URL Depth**: {depth} levels (Consider flattening)")
+        
+        # Special characters check
+        special_chars = set(url) - set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~:/?#[]@!$&\'()*+,;=')
+        if not special_chars:
+            st.success("✅ **Special Characters**: Clean URL structure")
+        else:
+            st.warning(f"⚠️ **Special Characters**: Found {len(special_chars)} special characters")
+
+
+# Enhanced main function to run the analysis
 def run_analysis(url):
    # Parse the URL
    parsed_url = urlparse(url)
    netloc = parsed_url.netloc  # Domain name
    path = parsed_url.path  # Path after the domain

-    # Run checks
+    # Run existing checks
    check_https(url)
    check_url_length(path)
    check_hyphens(path)
    check_file_extension(path)
+    
+    # Add new enhanced analyses
+    enhanced_url_analysis(url)
+    analyze_http_headers(url)
+    check_robots_and_sitemap(url)
+    
+    # Keep existing keyword insights
    show_keyword_insights(netloc, path)
+    
+    # Add summary section
+    st.subheader("📋 Analysis Summary & Recommendations")
+    st.write("---")
+    st.success("🎉 **Analysis Complete!** Review the findings above and implement the recommendations for better SEO performance.")
+    
+    recommendations = [
+        "✅ Ensure HTTPS is enabled for security and SEO benefits",
+        "🔗 Keep URLs short, descriptive, and user-friendly",
+        "🔒 Implement security headers to protect your site",
+        "🤖 Create and maintain robots.txt and XML sitemaps",
+        "⚡ Enable compression and optimize HTTP headers for performance",
+        "📊 Monitor your URL structure and avoid excessive depth"
+    ]
+    
+    st.write("**Key Recommendations:**")
+    for rec in recommendations:
+        st.write(rec)


 # Display the app