diff --git a/lib/ai_seo_tools/google_pagespeed_insights.py b/lib/ai_seo_tools/google_pagespeed_insights.py new file mode 100644 index 00000000..98ff1966 --- /dev/null +++ b/lib/ai_seo_tools/google_pagespeed_insights.py @@ -0,0 +1,434 @@ +import requests +import streamlit as st +import json +import pandas as pd +import base64 +import plotly.express as px +import time +from datetime import datetime + +def run_pagespeed(url, api_key=None, strategy='DESKTOP', locale='en'): + """Fetches and processes PageSpeed Insights data.""" + + serviceurl = 'https://www.googleapis.com/pagespeedonline/v5/runPagespeed' + base_url = f"{serviceurl}?url={url}&strategy={strategy}&locale={locale}&category=performance&category=accessibility&category=best-practices&category=seo" + + if api_key: + base_url += f"&key={api_key}" + + #if categories: + # base_url += f"&category={','.join(categories)}" + + try: + response = requests.get(base_url) + response.raise_for_status() # Raise an exception for bad status codes + + data = response.json() + return data + + except requests.exceptions.RequestException as e: + st.error(f"Error fetching PageSpeed Insights data: {e}") + return None + +def display_results(data): + """Presents PageSpeed Insights data in a user-friendly format.""" + # Credits: https://www.danielherediamejias.com/pagespeed-insights-api-with-python/ + st.subheader("PageSpeed Insights Report") + + # Extract scores from the PageSpeed Insights data + performance_overall_score = data['lighthouseResult']['categories']['performance']['score'] * 100 + accessibility_overall_score = data['lighthouseResult']['categories']['accessibility']['score'] * 100 + seo_overall_score = data['lighthouseResult']['categories']['seo']['score'] * 100 + best_practices_score = data['lighthouseResult']['categories']['best-practices']['score'] * 100 + + col1, col2 = st.columns([5, 5]) + with col1: + # Display metrics with improved help messages + st.subheader("Performance") + category_description = data['lighthouseResult']['categories']['performance'].get('description', "No description available") + if "No description available" in category_description: + category_description = "This score represents Google's assessment of your page's speed. A higher percentage indicates better performance." + st.metric( + label="Overall Performance Score", + value=f"{performance_overall_score:.0f}%", + help=category_description + ) + with col2: + st.subheader("Accessibility") + category_description = data['lighthouseResult']['categories']['accessibility'].get('description', "No description available") + if "No description available" in category_description: + category_description = "This score evaluates how accessible your page is to users with disabilities. A higher percentage means better accessibility." + st.metric( + label="Overall Accessibility Score", + value=f"{accessibility_overall_score:.0f}%", + help=category_description + ) + + col1, col2 = st.columns([5, 5]) + with col1: + st.subheader("SEO") + category_description = data['lighthouseResult']['categories']['seo'].get('description', "No description available") + if "No description available" in category_description: + category_description = "This score measures how well your page is optimized for search engines. A higher percentage indicates better SEO practices." + st.metric( + label="Overall SEO Score", + value=f"{seo_overall_score:.0f}%", + help=category_description + ) + + with col2: + st.subheader("Best Practises") + category_description = data['lighthouseResult']['categories']['best-practices'].get('description', "No description available") + if "No description available" in category_description: + category_description = "This score reflects how well your page follows best practices for web development. A higher percentage signifies adherence to best practices." + st.metric( + label="Overall Best Practices Score", + value=f"{best_practices_score:.0f}%", + help=category_description + ) + + # Display additional metrics from the article + st.subheader("Additional Metrics") + additional_metrics = [] + for metric_name, metric_value in { + "First Contentful Paint (FCP)": data['lighthouseResult']['audits']['first-contentful-paint']['displayValue'], + "Largest Contentful Paint (LCP)": data['lighthouseResult']['audits']['largest-contentful-paint']['displayValue'], + "Time to Interactive (TTI)": data['lighthouseResult']['audits']['interactive']['displayValue'], + "Total Blocking Time (TBT)": data['lighthouseResult']['audits']['total-blocking-time']['displayValue'], + "Cumulative Layout Shift (CLS)": data['lighthouseResult']['audits']['cumulative-layout-shift']['displayValue'] + }.items(): + additional_metrics.append({"Metric": metric_name, "Value": metric_value}) + st.table(pd.DataFrame(additional_metrics)) + + # 2.4.1.- Network Requests + st.subheader("Network Requests") + if 'network-requests' in data['lighthouseResult']['audits']: + listrequests = [] + for item in data["lighthouseResult"]["audits"]["network-requests"]["details"]["items"]: + endtime = item.get("endTime", "N/A") + starttime = item.get("startTime", "N/A") + transfersize = item.get("transferSize", "N/A") + resourcesize = item.get("resourceSize", "N/A") + url = item.get("url", "N/A") + + # Filter for significant requests (adjust thresholds as needed) + if transfersize != "N/A" and transfersize > 100000 or resourcesize != "N/A" and resourcesize > 100000: + # Convert bytes to MBs + transfersize_mb = round(transfersize / 1048576, 2) # 1 MB = 1048576 bytes + resourcesize_mb = round(resourcesize / 1048576, 2) + list1 = [endtime, starttime, transfersize_mb, resourcesize_mb, url] + listrequests.append(list1) + + if listrequests: + st.dataframe(pd.DataFrame(listrequests, columns=["End Time", "Start Time", "Transfer Size (MB)", "Resource Size (MB)", "URL"]), use_container_width=True) + else: + st.write("No significant network requests found.") + + # 2.4.2.- Mainthread Work Breakdown + st.subheader("Mainthread Work Breakdown") + if 'mainthread-work-breakdown' in data['lighthouseResult']['audits']: + mainthread_score = data["lighthouseResult"]["audits"]["mainthread-work-breakdown"]["score"] + mainthread_duration = data["lighthouseResult"]["audits"]["mainthread-work-breakdown"]["displayValue"] + st.write(f"Score: {mainthread_score}, Duration: {mainthread_duration}") + + # Extract data for visualization + breakdown_data = [] + for item in data["lighthouseResult"]["audits"]["mainthread-work-breakdown"]["details"]["items"]: + duration = item.get("duration", "N/A") + process = item.get("groupLabel", "N/A") + if duration != "N/A": # Only include non-N/A values + breakdown_data.append({"Process": process, "Duration (ms)": duration}) # Make sure the column name is "Process" + + # Create a bar chart using Streamlit + if breakdown_data: + fig = px.bar( + pd.DataFrame(breakdown_data), + x="Process", # Now using the "Process" column + y="Duration (ms)", + title="Mainthread Work Breakdown", + labels={"Process": "Process", "Duration (ms)": "Duration (ms)"}, + hover_data=["Process", "Duration (ms)"] + ) + st.plotly_chart(fig, use_container_width=True) + else: + st.write("No significant main thread work breakdown data found.") + + # 2.4.3.- Use of Passive Event Listeners + st.subheader("Use of Passive Event Listeners") + if 'uses-passive-event-listeners' in data['lighthouseResult']['audits']: + event_listeners = data["lighthouseResult"]["audits"]["uses-passive-event-listeners"]["score"] + st.write(f"Score: {event_listeners}") + listevents = [] + for item in data["lighthouseResult"]["audits"]["uses-passive-event-listeners"]["details"]["items"]: + url = item.get("url", "N/A") + line = item.get("label", "N/A") + if url != "N/A" and line != "N/A": + list1 = [url, line] + listevents.append(list1) + if listevents: + st.table(pd.DataFrame(listevents, columns=["URL", "Code Line"])) + else: + st.write("No significant passive event listener data found.") + + # 2.4.4.- Dom Size + st.subheader("DOM Size") + if 'dom-size' in data['lighthouseResult']['audits']: + dom_size_score = data["lighthouseResult"]["audits"]["dom-size"]["score"] + dom_size_elements = data["lighthouseResult"]["audits"]["dom-size"]["displayValue"] + st.write(f"Score: {dom_size_score}, DOM Size: {dom_size_elements}") + st.info("A large DOM can impact performance, especially for mobile devices. Consider optimizing your HTML structure and using techniques like lazy loading to improve page load times.") + + # 2.4.5.- OffScreen Images + st.subheader("Offscreen Images") + if 'offscreen-images' in data['lighthouseResult']['audits']: + offscreen_images_score = data["lighthouseResult"]["audits"]["offscreen-images"]["score"] + offscreen_images = data["lighthouseResult"]["audits"]["offscreen-images"]["displayValue"] + st.write(f"Score: {offscreen_images_score}, Offscreen Images: {offscreen_images}") + listoffscreenimages = [] + for item in data["lighthouseResult"]["audits"]["offscreen-images"]["details"]["items"]: + url = item.get("url", "N/A") + totalbytes = item.get("totalBytes", "N/A") + wastedbytes = item.get("wastedBytes", "N/A") + wastedpercent = item.get("wastedPercent", "N/A") + if url != "N/A": + list1 = [url, totalbytes, wastedbytes, wastedpercent] + listoffscreenimages.append(list1) + if listoffscreenimages: + st.table(pd.DataFrame(listoffscreenimages, columns=["URL", "Total Bytes", "Wasted Bytes", "Wasted Percentage"])) + st.info("Consider using lazy loading for offscreen images. This will delay their loading until they are visible in the viewport, improving initial page load times.") + else: + st.write("No significant offscreen image data found.") + + # 2.4.6.- Critical Requests Chains + st.subheader("Critical Request Chains") + if 'critical-request-chains' in data['lighthouseResult']['audits']: + critical_requests = data["lighthouseResult"]["audits"]["critical-request-chains"]["displayValue"] + st.write(f"Number of Critical Request Chains: {critical_requests}") + listchains = [] + for keys in data["lighthouseResult"]["audits"]["critical-request-chains"]["details"]["chains"].keys(): + try: + for values in data["lighthouseResult"]["audits"]["critical-request-chains"]["details"]["chains"][keys]["children"].values(): + url = values["request"]["url"] + startime = values["request"]["startTime"] + endtime = values["request"]["endTime"] + transfersize = values["request"]["transferSize"] + list1 = [url,startime,endtime,transfersize, keys] + listchains.append(list1) + except: + continue + if listchains: + st.table(pd.DataFrame(listchains, columns=["URL", "Start Time", "End Time", "Transfer Size", "Chain"])) + st.info("Optimizing the critical request chains can significantly improve the loading time of your website. Consider prioritizing essential resources and deferring non-critical ones.") + else: + st.write("No significant critical request chain data found.") + + # 2.4.7.- Total Bytes Weight + st.subheader("Total Bytes Weight") + if 'total-byte-weight' in data['lighthouseResult']['audits']: + bytes_weight_score = data["lighthouseResult"]["audits"]["total-byte-weight"]["score"] + bytes_weight = data["lighthouseResult"]["audits"]["total-byte-weight"]["displayValue"] + st.write(f"Score: {bytes_weight_score}, Total Bytes Weight: {bytes_weight}") + listbytes = [] + for item in data["lighthouseResult"]["audits"]["total-byte-weight"]["details"]["items"]: + url = item.get("url", "N/A") + bytes_total = item.get("totalBytes", "N/A") + if url != "N/A": + list1 = [url, bytes_total] + listbytes.append(list1) + if listbytes: + st.table(pd.DataFrame(listbytes, columns=["URL", "Total Bytes"])) + st.info("Reducing the total bytes weight of your website is crucial for improving performance, especially on mobile devices and for users with slower internet connections.") + else: + st.write("No significant total bytes weight data found.") + +# # 2.4.8.- Use of responsive images +# st.subheader("Use of Responsive Images") +# if 'uses-responsive-images' in data['lighthouseResult']['audits']: +# responsive_images_score = data["lighthouseResult"]["audits"]["uses-responsive-images"]["score"] +# responsive_image_savings = data["lighthouseResult"]["audits"]["uses-responsive-images"]["displayValue"] +# st.write(f"Score: {responsive_images_score}, Potential Savings: {responsive_image_savings}") +# listresponsivesavings = [] +# for item in data["lighthouseResult"]["audits"]["uses-responsive-images"]["details"]["items"]: +# url = item.get("url", "N/A") +# wastedbytes = item.get("wastedBytes", "N/A") +# totalbytes = item.get("totalBytes", "N/A") +# if url != "N/A": +# list1 = [url, wastedbytes, totalbytes] +# listresponsivesavings.append(list1) +# if listresponsivesavings: +# st.table(pd.DataFrame(listresponsivesavings, columns=["URL", "Wasted Bytes", "Total Bytes"])) +# st.info("Serving images in different sizes based on the user's device screen size can significantly reduce download times and improve performance.") +# else: +# st.write("No significant responsive image data found.") + + # 2.4.9.- Render Blocking Resources + st.subheader("Render Blocking Resources") + if 'render-blocking-resources' in data['lighthouseResult']['audits']: + blocking_resources_score = data["lighthouseResult"]["audits"]["render-blocking-resources"]["score"] + # Handle potential missing 'displayValue' + blocking_resoures_savings = data["lighthouseResult"]["audits"]["render-blocking-resources"].get("displayValue", "N/A") + st.write(f"Score: {blocking_resources_score}, Potential Savings: {blocking_resoures_savings}") + listblockingresources = [] + for item in data["lighthouseResult"]["audits"]["render-blocking-resources"]["details"]["items"]: + url = item.get("url", "N/A") + totalbytes = item.get("totalBytes", "N/A") + wastedbytes = item.get("wastedMs", "N/A") + if url != "N/A": + list1 = [url, totalbytes, wastedbytes] + listblockingresources.append(list1) + if listblockingresources: + st.table(pd.DataFrame(listblockingresources, columns=["URL", "Total Bytes", "Wasted Milliseconds"])) + st.info("Render-blocking resources can delay the initial rendering of your page, making it appear slow to users. Consider optimizing the loading of critical resources and deferring non-critical ones.") + else: + st.write("No significant render-blocking resource data found.") + + # 2.4.10.- Use of Rel Preload + st.subheader("Use of Rel Preload") + if 'uses-rel-preload' in data['lighthouseResult']['audits']: + rel_preload_score = data["lighthouseResult"]["audits"]["uses-rel-preload"]["score"] + rel_preload_savings = data["lighthouseResult"]["audits"]["uses-rel-preload"]["displayValue"] + st.write(f"Score: {rel_preload_score}, Potential Savings: {rel_preload_savings}") + listrelpreload = [] + for item in data["lighthouseResult"]["audits"]["uses-rel-preload"]["details"]["items"]: + url = item.get("url", "N/A") + wastedms = item.get("wastedMs", "N/A") + if url != "N/A": + list1 = [url, wastedms] + listrelpreload.append(list1) + if listrelpreload: + st.table(pd.DataFrame(listrelpreload, columns=["URL", "Wasted Milliseconds"])) + st.info("The `rel=preload` attribute can be used to prioritize loading essential resources, improving initial page load times.") + else: + st.write("No significant rel preload data found.") + + # 2.4.11.- Estimated Input Latency (DEPRECATED) + st.subheader("Estimated Input Latency") + if 'estimated-input-latency' in data['lighthouseResult']['audits']: + eil_score = data["lighthouseResult"]["audits"]["estimated-input-latency"]["score"] + eil_duration = data["lighthouseResult"]["audits"]["estimated-input-latency"]["displayValue"] + st.write(f"Score: {eil_score}, Duration: {eil_duration}") + + # 2.4.12.- Redirects + st.subheader("Redirects") + if 'redirects' in data['lighthouseResult']['audits']: + redirects_score = data["lighthouseResult"]["audits"]["redirects"]["score"] + redirect_savings = data["lighthouseResult"]["audits"]["redirects"]["displayValue"] + st.write(f"Score: {redirects_score}, Potential Savings: {redirect_savings}") + listredirects = [] + for item in data["lighthouseResult"]["audits"]["redirects"]["details"]["items"]: + url = item.get("url", "N/A") + wastedms = item.get("wastedMs", "N/A") + list1 = [url,wastedms] + listredirects.append(list1) + st.table(pd.DataFrame(listredirects, columns=["URL", "Wasted Milliseconds"])) + + # 2.4.13.- Unused JavaScript + st.subheader("Unused JavaScript") + if 'unused-javascript' in data['lighthouseResult']['audits']: + unused_js_score = data["lighthouseResult"]["audits"]["unused-javascript"]["score"] + unused_js_savings = data["lighthouseResult"]["audits"]["unused-javascript"]["displayValue"] + st.write(f"Score: {unused_js_score}, Potential Savings: {unused_js_savings}") + listunusedjavascript = [] + for item in data["lighthouseResult"]["audits"]["unused-javascript"]["details"]["items"]: + url = item.get("url", "N/A") + totalbytes = item.get("totalBytes", "N/A") + wastedbytes = item.get("wastedBytes", "N/A") + wastedpercentage= item.get("wastedPercent", "N/A") + list1 = [url, totalbytes, wastedbytes, wastedpercentage] + listunusedjavascript.append(list1) + st.table(pd.DataFrame(listunusedjavascript, columns=["URL", "Total Bytes", "Wasted Bytes", "Wasted Percentage"])) + + # 2.4.14.- Total Blocking Time + st.subheader("Total Blocking Time") + if 'total-blocking-time' in data['lighthouseResult']['audits']: + blocking_time_score = data["lighthouseResult"]["audits"]["total-blocking-time"]["score"] + blocking_time_duration = data["lighthouseResult"]["audits"]["total-blocking-time"]["displayValue"] + st.write(f"Score: {blocking_time_score}, Duration: {blocking_time_duration}") + + # 2.4.15.- First Meaningful Paint + st.subheader("First Meaningful Paint") + if 'first-meaningful-paint' in data['lighthouseResult']['audits']: + fmp_score = data["lighthouseResult"]["audits"]["first-meaningful-paint"]["score"] + fmp = data["lighthouseResult"]["audits"]["first-meaningful-paint"]["displayValue"] + st.write(f"Score: {fmp_score}, Time: {fmp}") + + # 2.4.16.- Cumulative Layout Shift + st.subheader("Cumulative Layout Shift") + if 'cumulative-layout-shift' in data['lighthouseResult']['audits']: + cls_score = data["lighthouseResult"]["audits"]["cumulative-layout-shift"]["score"] + cls = data["lighthouseResult"]["audits"]["cumulative-layout-shift"]["displayValue"] + st.write(f"Score: {cls_score}, Value: {cls}") + + # 2.4.17.- Network RTT + st.subheader("Network RTT") + if 'network-rtt' in data['lighthouseResult']['audits']: + network_rtt = data["lighthouseResult"]["audits"]["network-rtt"]["displayValue"] + st.write(f"Network RTT: {network_rtt}") + + # 2.4.18.- Speed Index + st.subheader("Speed Index") + if 'speed-index' in data['lighthouseResult']['audits']: + speed_index_score = data["lighthouseResult"]["audits"]["speed-index"]["score"] + speed_index = data["lighthouseResult"]["audits"]["speed-index"]["displayValue"] + st.write(f"Score: {speed_index_score}, Speed Index: {speed_index}") + + # 2.4.19.- Use of Rel Preconnect + st.subheader("Use of Rel Preconnect") + if 'uses-rel-preconnect' in data['lighthouseResult']['audits']: + rel_preconnect_score = data["lighthouseResult"]["audits"]["uses-rel-preconnect"]["score"] + rel_preconnect_warning = data["lighthouseResult"]["audits"]["uses-rel-preconnect"]["warnings"] + st.write(f"Score: {rel_preconnect_score}, Warnings: {rel_preconnect_warning}") + +# # 2.4.20.- Use of Optimized Images +# st.subheader("Use of Optimized Images") +# if 'uses-optimized-images' in data['lighthouseResult']['audits']: +# optimized_images_score = data["lighthouseResult"]["audits"]["uses-optimized-images"]["score"] +# optimized_images = data["lighthouseResult"]["audits"]["uses-optimized-images"]["displayValue"] +# st.write(f"Score: {optimized_images_score}, Potential Savings: {optimized_images}") +# listoptimisedimages = [] +# for item in data["lighthouseResult"]["audits"]["uses-optimized-images"]["details"]["items"]: +# url = item.get("url", "N/A") +# wastedbytes = item.get("wastedBytes", "N/A") +# totalbytes = item.get("totalBytes", "N/A") +# list1 = [url, wastedbytes, totalbytes] +# listoptimisedimages.append(list1) +# st.table(pd.DataFrame(listoptimisedimages, columns=["URL", "Wasted Bytes", "Total Bytes"])) + + + + +def google_pagespeed_insights(): + + st.markdown("