Made changes to Getting started with ALwrity and added lot of details on API keys

2025-04-01 13:11:40 +05:30
parent 367f9bac2c
commit 7d6ea91e6a
68 changed files with 8384 additions and 823 deletions
--- a/lib/ai_seo_tools/on_page_seo_analyzer.py
+++ b/lib/ai_seo_tools/on_page_seo_analyzer.py
@@ -2,7 +2,7 @@ import os
 import json
 import streamlit as st
 from tenacity import retry, stop_after_attempt, wait_random_exponential
-import cloudscraper
+import crawl4ai
 from bs4 import BeautifulSoup
 import requests
 import csv
@@ -18,7 +18,7 @@ from ..gpt_providers.text_generation.main_text_generation import llm_text_gen

 def fetch_and_parse_html(url):
    """
-    Fetches HTML content from the given URL using CloudScraper and parses it with BeautifulSoup.
+    Fetches HTML content from the given URL using crawl4ai and parses it with BeautifulSoup.

    Args:
        url (str): The URL of the webpage to fetch.
@@ -27,9 +27,8 @@ def fetch_and_parse_html(url):
        BeautifulSoup: Parsed HTML content.
    """
    try:
-        scraper = cloudscraper.create_scraper()
-        html = scraper.get(url)
-        soup = BeautifulSoup(html.text, 'html.parser')
+        html = crawl4ai.get(url)
+        soup = BeautifulSoup(html, 'html.parser')
        return soup
    except Exception as e:
        st.error(f"⚠️ Error fetching or parsing HTML: {e}")
--- a/lib/ai_seo_tools/textstaty.py
+++ b/lib/ai_seo_tools/textstaty.py
@@ -1,7 +1,35 @@
-import textstat
-import streamlit as st
+"""Text analysis tools using textstat."""

-st.set_page_config(layout="wide", page_title="Text Readability Analyzer", page_icon=":book:")
+import streamlit as st
+from textstat import textstat
+
+def analyze_text(text):
+    """Analyze text using textstat metrics."""
+    if not text:
+        st.warning("Please enter some text to analyze.")
+        return
+    
+    # Calculate various metrics
+    metrics = {
+        "Flesch Reading Ease": textstat.flesch_reading_ease(text),
+        "Flesch-Kincaid Grade Level": textstat.flesch_kincaid_grade(text),
+        "Gunning Fog Index": textstat.gunning_fog(text),
+        "SMOG Index": textstat.smog_index(text),
+        "Automated Readability Index": textstat.automated_readability_index(text),
+        "Coleman-Liau Index": textstat.coleman_liau_index(text),
+        "Linsear Write Formula": textstat.linsear_write_formula(text),
+        "Dale-Chall Readability Score": textstat.dale_chall_readability_score(text),
+        "Readability Consensus": textstat.readability_consensus(text)
+    }
+    
+    # Display metrics in a clean format
+    st.subheader("Text Analysis Results")
+    for metric, value in metrics.items():
+        st.metric(metric, f"{value:.2f}")
+    
+    # Add visualizations
+    st.subheader("Visualization")
+    st.bar_chart(metrics)

 st.title("📖  Text Readability Analyzer:  Making Your Content Easy to Read")

@@ -10,122 +38,6 @@ st.write("""
    Just paste in a sample of your text, and we'll break down the readability scores and offer actionable tips! 
 """)

-
-def analyze_text(test_data):
-    """
-    Analyzes the readability of the provided text and returns a dictionary with the results.
-
-    Parameters:
-    test_data (str): The text to be analyzed.
-
-    Returns:
-    dict: A dictionary containing readability scores and additional metrics.
-    """
-    return {
-        "Flesch Reading Ease": {
-            "score": textstat.flesch_reading_ease(test_data),
-            "description": "This score rates your text on a scale of 0-100, with higher scores being easier to read.",
-            "tips": [
-                "Score below 30? Simplify your text by breaking down complex sentences, using shorter words, and avoiding jargon.",
-                "Score around 60-70? You're in the 'standard' range.",
-                "Score over 90? Your text is very easy to read. Add some complexity or sophistication if needed."
-            ]
-        },
-        "Flesch-Kincaid Grade Level": {
-            "score": textstat.flesch_kincaid_grade(test_data),
-            "description": "This formula estimates the US school grade level needed to understand your text.",
-            "tips": [
-                "High Score? Your writing might be too complex for your target audience.",
-                "Low Score? Your audience might find the text too simple.",
-                "Match Your Audience: Tailor the complexity to your readers."
-            ]
-        },
-        "SMOG Index": {
-            "score": textstat.smog_index(test_data),
-            "description": "This formula measures text complexity by looking at the number of long words and sentences.",
-            "tips": [
-                "Best for texts with at least 30 sentences.",
-                "Adjust complexity to match your target audience."
-            ]
-        },
-        "Coleman-Liau Index": {
-            "score": textstat.coleman_liau_index(test_data),
-            "description": "This formula uses sentence length and the number of syllables per word to estimate the reading level."
-        },
-        "Automated Readability Index (ARI)": {
-            "score": textstat.automated_readability_index(test_data),
-            "description": "Estimates the grade level required to comprehend your text."
-        },
-        "Dale-Chall Readability Score": {
-            "score": textstat.dale_chall_readability_score(test_data),
-            "description": "Focuses on the number of uncommon words (not on a list of 3000 common words) and sentence length.",
-            "tips": [
-                "Easy to Understand: Aim for a score around the reading level of your audience.",
-                "High School Level? Scores between 9 and 12 indicate a high school reading level.",
-                "Beyond High School? Scores above 12 are usually for a college-level audience."
-            ]
-        },
-        "Gunning Fog": {
-            "score": textstat.gunning_fog(test_data),
-            "description": "Calculates the grade level required to understand the text."
-        },
-        "Linsear Write Formula": {
-            "score": textstat.linsear_write_formula(test_data),
-            "description": "Estimates the US grade level needed to understand the text."
-        },
-        "Text Standard (Consensus)": {
-            "score": textstat.text_standard(test_data),
-            "description": "A consensus estimate of the US grade level needed to understand your text, based on multiple readability scores."
-        },
-        "Spache Readability": {
-            "score": textstat.spache_readability(test_data),
-            "description": "Best for analyzing text for children, typically up to grade 4.",
-            "tips": [
-                "Considers the number of unfamiliar words and the length of sentences."
-            ]
-        },
-        "McAlpine EFLAW": {
-            "score": textstat.mcalpine_eflaw(test_data),
-            "description": "Evaluates text for foreign language learners, focusing on 'miniwords' and sentence length.",
-            "tips": [
-                "Target Score: Aim for a score of 25 or less."
-            ]
-        },
-        "Reading Time": {
-            "score": textstat.reading_time(test_data),
-            "description": "Estimated reading time in minutes."
-        },
-        "Syllable Count": {
-            "score": textstat.syllable_count(test_data),
-            "description": "The number of syllables in the text."
-        },
-        "Word Count": {
-            "score": textstat.lexicon_count(test_data),
-            "description": "The number of words in the text."
-        },
-        "Sentence Count": {
-            "score": textstat.sentence_count(test_data),
-            "description": "The number of sentences in the text."
-        },
-        "Character Count": {
-            "score": textstat.char_count(test_data),
-            "description": "The number of characters in the text."
-        },
-        "Letter Count (without punctuation)": {
-            "score": textstat.letter_count(test_data),
-            "description": "The number of letters without punctuation."
-        },
-        "Polysyllable Count": {
-            "score": textstat.polysyllabcount(test_data),
-            "description": "The number of polysyllabic words in the text."
-        },
-        "Monosyllable Count": {
-            "score": textstat.monosyllabcount(test_data),
-            "description": "The number of monosyllabic words in the text."
-        }
-    }
-
-
 text_input = st.text_area("Paste your text here:", height=200)

 if st.button("Analyze!"):
@@ -134,18 +46,7 @@ if st.button("Analyze!"):
        if not test_data.strip():
            st.error("Please enter text to analyze.")
        else:
-            results = analyze_text(test_data)
-
-            st.subheader("Readability Scores:")
-            st.write("---")
-            for metric, data in results.items():
-                st.markdown(f"**{metric}:** {data['score']}")
-                st.markdown(f"* **What It Means:** {data['description']}")
-                if 'tips' in data:
-                    st.markdown("* **Actionable Tips:**")
-                    for tip in data['tips']:
-                        st.markdown(f"    * {tip}")
-                st.write("  ")
+            analyze_text(test_data)

            st.subheader("Key Takeaways:")
            st.write("---")
--- a/lib/ai_seo_tools/webpage_content_analysis.py
+++ b/lib/ai_seo_tools/webpage_content_analysis.py
@@ -1,3 +1,5 @@
+"""Webpage content analysis tool."""
+
 import streamlit as st
 import requests
 from bs4 import BeautifulSoup
@@ -7,8 +9,7 @@ from nltk.tokenize import word_tokenize
 from nltk.util import ngrams
 from langchain.llms import OpenAI
 from langchain.chains import ConversationChain
-
-st.set_page_config(layout="wide", page_title="Web Content Analyzer - Dive Deep with AI!", page_icon=":mag_right:")
+from urllib.parse import urlparse

 st.title("🧠 Web Content Analyzer: Uncover Hidden Insights with AI! 🧠")
 st.write("""
@@ -39,19 +40,36 @@ if st.button("Analyze with AI!"):
            st.stop()

        try:
+            # Validate URL
+            parsed_url = urlparse(url)
+            if not parsed_url.scheme:
+                url = "https://" + url
+            
+            # Fetch webpage content
            response = requests.get(url)
-            response.raise_for_status()  
-
-            soup = BeautifulSoup(response.content, 'html.parser')
-            body_txt = soup.find('body').text
-
-            words = [w.lower() for w in word_tokenize(body_txt)]
-            stopw = nltk.corpus.stopwords.words(language)
-
-            final_words = [w for w in words if w not in stopw and w.isalpha()]
-
+            response.raise_for_status()
+            
+            # Parse HTML
+            soup = BeautifulSoup(response.text, 'html.parser')
+            
+            # Extract content
+            title = soup.title.string if soup.title else "No title found"
+            meta_description = soup.find('meta', {'name': 'description'})
+            description = meta_description['content'] if meta_description else "No description found"
+            
+            # Display results
+            st.subheader("Page Analysis")
+            st.metric("Title", title)
+            st.metric("Description", description)
+            
+            # Content statistics
+            text_content = soup.get_text()
+            words = text_content.split()
+            st.metric("Word Count", len(words))
+            st.metric("Unique Words", len(set(words)))
+            
            # Frequency analysis (same as before)
-            freq = nltk.FreqDist(final_words)
+            freq = nltk.FreqDist(words)
            keywords = freq.most_common(10)  
            df_keywords = pd.DataFrame(keywords, columns=("Keyword", "Frequency"))

@@ -60,19 +78,19 @@ if st.button("Analyze with AI!"):
            st.write("  ")

            st.markdown("**Main Theme:**")
-            ai_theme = conversation_chain.run(f"What is the main theme or topic of this content? \n {body_txt}")
+            ai_theme = conversation_chain.run(f"What is the main theme or topic of this content? \n {text_content}")
            st.markdown(f"  {ai_theme}")

            st.write("  ")

            st.markdown("**Suggested Keywords:**")
-            ai_keywords = conversation_chain.run(f"What other relevant keywords might be helpful to target for this content? \n {body_txt}")
+            ai_keywords = conversation_chain.run(f"What other relevant keywords might be helpful to target for this content? \n {text_content}")
            st.markdown(f"  {ai_keywords}")

            st.write("  ")

            st.markdown("**Content Improvement:**")
-            ai_improvement = conversation_chain.run(f"What could be done to improve this content for clarity, engagement, or SEO? \n {body_txt}")
+            ai_improvement = conversation_chain.run(f"What could be done to improve this content for clarity, engagement, or SEO? \n {text_content}")
            st.markdown(f"  {ai_improvement}")

            # --- Display Frequency Results ---
@@ -94,3 +112,5 @@ if st.button("Analyze with AI!"):
            """)
        except requests.exceptions.RequestException as e:
            st.error(f"Oops! Something went wrong fetching the URL.  Error: {e}")
+        except Exception as e:
+            st.error(f"An error occurred: {e}")
--- a/lib/ai_seo_tools/wordcloud.py
+++ b/lib/ai_seo_tools/wordcloud.py
@@ -1,3 +1,5 @@
+"""Word cloud generation tool."""
+
 import streamlit as st
 import requests
 from bs4 import BeautifulSoup
@@ -5,8 +7,8 @@ import pandas as pd
 import nltk
 from nltk.tokenize import word_tokenize
 from nltk.util import ngrams
-
-st.set_page_config(layout="wide", page_title="Web Content Analyzer - Dive Into Your Words!", page_icon=":mag:")
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt

 st.title("🔎 Web Content Analyzer:  Uncover Your Words' Power! 🔎")
 st.write("""
@@ -86,3 +88,26 @@ if st.button("Analyze Your Content!"):
        
        except requests.exceptions.RequestException as e:
            st.error(f"Oops! Something went wrong fetching the URL.  Error: {e}")
+
+def generate_wordcloud(text):
+    """Generate a word cloud from the given text."""
+    if not text:
+        st.warning("Please enter some text to generate a word cloud.")
+        return
+    
+    # Create and generate a word cloud image
+    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
+    
+    # Display the word cloud
+    st.subheader("Word Cloud Visualization")
+    fig, ax = plt.subplots(figsize=(10, 5))
+    ax.imshow(wordcloud, interpolation='bilinear')
+    ax.axis('off')
+    st.pyplot(fig)
+    
+    # Add some statistics
+    st.subheader("Text Statistics")
+    words = text.split()
+    unique_words = set(words)
+    st.metric("Total Words", len(words))
+    st.metric("Unique Words", len(unique_words))