From 74c862faecb1fc0b3d6046d134eae7f748e450f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D9=8A?= <ajay.calsoft@gmail.com>
Date: Fri, 17 Jan 2025 15:07:24 +0530
Subject: [PATCH] Update textstaty.py

User-Friendly Interface: The Streamlit interface is intuitive, allowing users to easily input text and get readability scores.
Comprehensive Analysis: The tool covers a wide range of readability metrics, providing detailed insights into the text's readability.
Actionable Tips: Each readability score is accompanied by actionable tips, helping users improve their content based on the analysis.
Additional Insights: The inclusion of additional metrics like reading time, syllable count, and word count provides a thorough analysis of the text.
Suggested Improvements:
Error Handling:

Add error handling for cases where the text input might be empty or too short for certain readability metrics.
Code Modularization:

Refactor the code to encapsulate readability calculations and markdown generation into separate functions. This will make the code more modular and easier to maintain.
Performance Optimization:

Optimize the readability calculation by avoiding redundant calculations if the text hasn't changed.
Code Readability:

Add docstrings and comments to explain the purpose of functions and complex code blocks.
Ensure consistent formatting and adherence to PEP8 standards.
---
 lib/ai_seo_tools/textstaty.py | 391 ++++++++++++----------------------
 1 file changed, 138 insertions(+), 253 deletions(-)

diff --git a/lib/ai_seo_tools/textstaty.py b/lib/ai_seo_tools/textstaty.py
index 77e85416..f7c55942 100644
--- a/lib/ai_seo_tools/textstaty.py
+++ b/lib/ai_seo_tools/textstaty.py
@@ -10,263 +10,148 @@ st.write("""
     Just paste in a sample of your text, and we'll break down the readability scores and offer actionable tips! 
 """)
 
+
+def analyze_text(test_data):
+    """
+    Analyzes the readability of the provided text and returns a dictionary with the results.
+
+    Parameters:
+    test_data (str): The text to be analyzed.
+
+    Returns:
+    dict: A dictionary containing readability scores and additional metrics.
+    """
+    return {
+        "Flesch Reading Ease": {
+            "score": textstat.flesch_reading_ease(test_data),
+            "description": "This score rates your text on a scale of 0-100, with higher scores being easier to read.",
+            "tips": [
+                "Score below 30? Simplify your text by breaking down complex sentences, using shorter words, and avoiding jargon.",
+                "Score around 60-70? You're in the 'standard' range.",
+                "Score over 90? Your text is very easy to read. Add some complexity or sophistication if needed."
+            ]
+        },
+        "Flesch-Kincaid Grade Level": {
+            "score": textstat.flesch_kincaid_grade(test_data),
+            "description": "This formula estimates the US school grade level needed to understand your text.",
+            "tips": [
+                "High Score? Your writing might be too complex for your target audience.",
+                "Low Score? Your audience might find the text too simple.",
+                "Match Your Audience: Tailor the complexity to your readers."
+            ]
+        },
+        "SMOG Index": {
+            "score": textstat.smog_index(test_data),
+            "description": "This formula measures text complexity by looking at the number of long words and sentences.",
+            "tips": [
+                "Best for texts with at least 30 sentences.",
+                "Adjust complexity to match your target audience."
+            ]
+        },
+        "Coleman-Liau Index": {
+            "score": textstat.coleman_liau_index(test_data),
+            "description": "This formula uses sentence length and the number of syllables per word to estimate the reading level."
+        },
+        "Automated Readability Index (ARI)": {
+            "score": textstat.automated_readability_index(test_data),
+            "description": "Estimates the grade level required to comprehend your text."
+        },
+        "Dale-Chall Readability Score": {
+            "score": textstat.dale_chall_readability_score(test_data),
+            "description": "Focuses on the number of uncommon words (not on a list of 3000 common words) and sentence length.",
+            "tips": [
+                "Easy to Understand: Aim for a score around the reading level of your audience.",
+                "High School Level? Scores between 9 and 12 indicate a high school reading level.",
+                "Beyond High School? Scores above 12 are usually for a college-level audience."
+            ]
+        },
+        "Gunning Fog": {
+            "score": textstat.gunning_fog(test_data),
+            "description": "Calculates the grade level required to understand the text."
+        },
+        "Linsear Write Formula": {
+            "score": textstat.linsear_write_formula(test_data),
+            "description": "Estimates the US grade level needed to understand the text."
+        },
+        "Text Standard (Consensus)": {
+            "score": textstat.text_standard(test_data),
+            "description": "A consensus estimate of the US grade level needed to understand your text, based on multiple readability scores."
+        },
+        "Spache Readability": {
+            "score": textstat.spache_readability(test_data),
+            "description": "Best for analyzing text for children, typically up to grade 4.",
+            "tips": [
+                "Considers the number of unfamiliar words and the length of sentences."
+            ]
+        },
+        "McAlpine EFLAW": {
+            "score": textstat.mcalpine_eflaw(test_data),
+            "description": "Evaluates text for foreign language learners, focusing on 'miniwords' and sentence length.",
+            "tips": [
+                "Target Score: Aim for a score of 25 or less."
+            ]
+        },
+        "Reading Time": {
+            "score": textstat.reading_time(test_data),
+            "description": "Estimated reading time in minutes."
+        },
+        "Syllable Count": {
+            "score": textstat.syllable_count(test_data),
+            "description": "The number of syllables in the text."
+        },
+        "Word Count": {
+            "score": textstat.lexicon_count(test_data),
+            "description": "The number of words in the text."
+        },
+        "Sentence Count": {
+            "score": textstat.sentence_count(test_data),
+            "description": "The number of sentences in the text."
+        },
+        "Character Count": {
+            "score": textstat.char_count(test_data),
+            "description": "The number of characters in the text."
+        },
+        "Letter Count (without punctuation)": {
+            "score": textstat.letter_count(test_data),
+            "description": "The number of letters without punctuation."
+        },
+        "Polysyllable Count": {
+            "score": textstat.polysyllabcount(test_data),
+            "description": "The number of polysyllabic words in the text."
+        },
+        "Monosyllable Count": {
+            "score": textstat.monosyllabcount(test_data),
+            "description": "The number of monosyllabic words in the text."
+        }
+    }
+
+
 text_input = st.text_area("Paste your text here:", height=200)
 
 if st.button("Analyze!"):
     with st.spinner("Analyzing your text..."):
         test_data = text_input
+        if not test_data.strip():
+            st.error("Please enter text to analyze.")
+        else:
+            results = analyze_text(test_data)
 
-        st.subheader("Readability Scores:")
-        st.write("---")
+            st.subheader("Readability Scores:")
+            st.write("---")
+            for metric, data in results.items():
+                st.markdown(f"**{metric}:** {data['score']}")
+                st.markdown(f"* **What It Means:** {data['description']}")
+                if 'tips' in data:
+                    st.markdown("* **Actionable Tips:**")
+                    for tip in data['tips']:
+                        st.markdown(f"    * {tip}")
+                st.write("  ")
 
-        # 1. Flesch Reading Ease
-        flesch_ease = textstat.flesch_reading_ease(test_data)
-        st.markdown(f"**Flesch Reading Ease:** {flesch_ease}")
-        st.markdown(""" 
-            * **What It Means:**  This score rates your text on a scale of 0-100, with higher scores being easier to read. Imagine a scale from "super confusing" (low scores) to "super easy" (high scores). 
-            * **Actionable Tips:**
-                * **Score below 30?**  It might be time to simplify. Break down complex sentences, use shorter words, and avoid jargon. 
-                * **Score around 60-70?**  You're in the "standard" range.  
-                * **Score over 90?**  Your text is very easy to read. But if you want to add some complexity or sophistication, try adding some longer sentences or slightly more complex vocabulary.
-        """)
-
-        st.write("  ")
-
-        # 2. Flesch-Kincaid Grade Level
-        flesch_kincaid = textstat.flesch_kincaid_grade(test_data)
-        st.markdown(f"**Flesch-Kincaid Grade Level:** {flesch_kincaid:.1f}")
-        st.markdown(""" 
-            * **What It Means:** This formula estimates the US school grade level needed to understand your text.  For example, a score of 7.2 means a 7th-grader should be able to understand it.
-            * **Actionable Tips:**
-                * **High Score?**   If the grade level is much higher than your target audience's expected level, your writing might be too complex.  
-                * **Low Score?**  If the score is significantly lower, your audience might find the text too simple.   
-                * **Match Your Audience:**  Remember to tailor the complexity to your readers!  
-        """)
-
-        st.write("  ")
-
-        # 3. SMOG Index
-        smog_index = textstat.smog_index(test_data)
-        st.markdown(f"**SMOG Index:** {smog_index:.1f}")
-        st.markdown(""" 
-            * **What It Means:** This formula measures how complex your text is by looking at the number of long words and sentences. 
-            * **Actionable Tips:**
-                * **Important Note:** This formula works best for texts with at least 30 sentences.
-                * **Adjust Complexity:**  SMOG helps you determine whether your writing is appropriate for your target audience.  
-        """)
-
-        st.write("  ")
-
-        # 4. Coleman-Liau Index
-        coleman_liau = textstat.coleman_liau_index(test_data)
-        st.markdown(f"**Coleman-Liau Index:** {coleman_liau:.1f}")
-        st.markdown(""" 
-            * **What It Means:** This formula uses a more advanced method of analyzing sentence length and the number of syllables per word to estimate the reading level. 
-        """)
-
-        st.write("  ")
-
-        # 5. Automated Readability Index (ARI)
-        ari = textstat.automated_readability_index(test_data)
-        st.markdown(f"**Automated Readability Index (ARI):** {ari:.1f}")
-        st.markdown(""" 
-            * **What It Means:**  Similar to other readability scores, the ARI estimates the grade level required to comprehend your text. 
-        """)
-
-        st.write("  ")
-
-        # 6. Dale-Chall Readability Score
-        dale_chall = textstat.dale_chall_readability_score(test_data)
-        st.markdown(f"**Dale-Chall Readability Score:** {dale_chall:.1f}")
-        st.markdown(""" 
-            * **What It Means:** This formula focuses on the number of uncommon words (not on a list of 3000 common words) and sentence length. 
-            * **Actionable Tips:**
-                * **Easy to Understand:**   Aim for a score around the reading level of your audience. If you're writing for a general audience, a score between 6 and 8 is usually considered good.
-                * **High School Level?**   Scores between 9 and 12 usually indicate a high school reading level. 
-                * **Beyond High School?**   Scores above 12 are usually for a college-level audience.
-        """)
-
-        st.write("  ")
-
-        # 7. Gunning Fog
-        gunning_fog = textstat.gunning_fog(test_data)
-        st.markdown(f"**Gunning Fog:** {gunning_fog:.1f}")
-        st.markdown(""" 
-            * **What It Means:** This formula calculates the grade level required to understand the text.
-        """)
-
-        st.write("  ")
-
-        # 8. Linsear Write Formula 
-        linsear = textstat.linsear_write_formula(test_data)
-        st.markdown(f"**Linsear Write Formula:** {linsear:.1f}")
-        st.markdown(""" 
-            * **What It Means:**  This formula aims to estimate the US grade level needed to understand the text. 
-        """)
-
-        st.write("  ")
-
-        # 9. Text Standard (Consensus)
-        text_standard = textstat.text_standard(test_data)
-        st.markdown(f"**Text Standard (Consensus):** {text_standard}")
-        st.markdown(""" 
-            * **What It Means:** This score is a consensus estimate of the US grade level needed to understand your text. It's an average of all the readability scores. 
-        """)
-
-        st.write("  ")
-
-        # 10.  Spache Readability 
-        spache = textstat.spache_readability(test_data)
-        st.markdown(f"**Spache Readability:** {spache:.1f}")
-        st.markdown(""" 
-            * **What It Means:** This formula is best for analyzing text for children,  typically up to grade 4.  It considers the number of unfamiliar words and the length of sentences. 
-        """)
-
-        st.write("  ")
-
-        # 11. McAlpine EFLAW
-        mcalpine = textstat.mcalpine_eflaw(test_data)
-        st.markdown(f"**McAlpine EFLAW:** {mcalpine:.1f}")
-        st.markdown(""" 
-            * **What It Means:**  This formula specifically evaluates text for foreign language learners (typically focusing on English).  It looks at "miniwords" and sentence length.
-            * **Target Score:**  Try to aim for a score of 25 or less. 
-        """)
-
-        st.write("  ")
-  
-        #  ---  Spanish Readability Formulas  (For Examples, replace 'test_data' with your Spanish text)---
-
-        # 12.  Fernandez-Huerta
-        # fernandez_huerta = textstat.fernandez_huerta(test_data)
-        # st.markdown(f"**Fernandez-Huerta (Spanish):** {fernandez_huerta:.1f}")
-        # st.markdown(""" 
-        #     * **Meaning:**  This is an adaptation of the Flesch Reading Ease formula specifically for Spanish.
-        #     * **Interpretation:** Higher scores mean easier readability.
-        # """)
-
-        # st.write("  ")
-
-        # 13. Szigriszt-Pazs (Spanish)
-        # szigriszt_pazos = textstat.szigriszt_pazos(test_data)
-        # st.markdown(f"**Szigriszt-Pazs (Spanish):** {szigriszt_pazos:.1f}")
-        # st.markdown(""" 
-        #     * **Meaning:**  Another adaptation of the Flesch Reading Ease for Spanish text. It tries to measure the text's understandability. 
-        # """)
-
-        # st.write("  ")
-
-        # 14.  Gutierrez-Polini (Spanish)
-        # gutierrez_polini = textstat.gutierrez_polini(test_data)
-        # st.markdown(f"**Gutierrez-Polini (Spanish):** {gutierrez_polini:.1f}")
-        # st.markdown(""" 
-        #     * **Meaning:** Designed specifically for Spanish grade-school texts.
-        #     * **Note:** The score may be unreliable for more complex text.
-        # """)
-
-        # st.write("  ")
-
-        # 15. Crawford (Spanish) 
-        # crawford = textstat.crawford(test_data)
-        # st.markdown(f"**Crawford (Spanish):** {crawford:.1f}")
-        # st.markdown(""" 
-        #     * **Meaning:**  This formula estimates the number of years of schooling needed to understand the text, primarily for elementary school-level Spanish.
-        # """)
-
-        # st.write("  ")
-
-        #  ---  Arabic Readability Formula  (For Examples, replace 'test_data' with your Arabic text) ---
-
-        # 16.  Osman
-        # osman = textstat.osman(test_data)
-        # st.markdown(f"**Osman (Arabic):** {osman:.1f}")
-        # st.markdown(""" 
-        #     * **Meaning:** Designed for Arabic texts. An adaptation of Flesch and Fog formulas.
-        # """)
-
-        # st.write("  ")
-
-        # --- Italian Readability Formula ---
-
-        # 17.  Gulpease Index 
-        # gulpease = textstat.gulpease_index(test_data)
-        # st.markdown(f"**Gulpease Index (Italian):** {gulpease:.1f}")
-        # st.markdown(""" 
-        #     * **Meaning:**  Measures the readability of Italian text.
-        #     * **Interpretation:** Lower scores require a higher level of education for ease of reading. 
-        # """)
-
-        # st.write("  ") 
-
-        # ---  German Readability Formula (For Examples, replace 'test_data' with your German text) ---
-
-        # 18. Wiener Sachtextformel
-        # wiener_sachtextformel = textstat.wiener_sachtextformel(test_data)
-        # st.markdown(f"**Wiener Sachtextformel (German):** {wiener_sachtextformel:.1f}")
-        # st.markdown(""" 
-        #     * **Meaning:** This formula measures the readability of German texts.
-        #     * **Interpretation:**
-        #         *  4:  Very easy text 
-        #         * 15:  Very difficult text 
-        # """)
-
-        # st.write("  ") 
-
-        st.subheader("Additional Insights:")
-        st.write("---")
-        
-        #  19. Reading Time
-        reading_time = textstat.reading_time(test_data) 
-        st.markdown(f"**Estimated Reading Time:** {reading_time:.1f} minutes")
-
-        st.write("  ")
-
-        # 20. Syllable Count 
-        syllable_count = textstat.syllable_count(test_data) 
-        st.markdown(f"**Syllable Count:** {syllable_count}")
-
-        st.write("  ")
-
-        # 21. Lexicon Count (Word Count)
-        lexicon_count = textstat.lexicon_count(test_data)
-        st.markdown(f"**Word Count:** {lexicon_count}")
-
-        st.write("  ") 
-
-        # 22.  Sentence Count 
-        sentence_count = textstat.sentence_count(test_data)
-        st.markdown(f"**Sentence Count:** {sentence_count}") 
-
-        st.write("  ")
-
-        # 23.  Character Count
-        char_count = textstat.char_count(test_data) 
-        st.markdown(f"**Character Count:** {char_count}")
-
-        st.write("  ") 
-
-        # 24.  Letter Count 
-        letter_count = textstat.letter_count(test_data)
-        st.markdown(f"**Letter Count (without punctuation):** {letter_count}")
-
-        st.write("  ") 
-
-        # 25.  Polysyllable Count 
-        polysyllable_count = textstat.polysyllabcount(test_data)
-        st.markdown(f"**Polysyllable Count:** {polysyllable_count}")
-
-        st.write("  ") 
-
-        # 26. Monosyllable Count
-        monosyllable_count = textstat.monosyllabcount(test_data)
-        st.markdown(f"**Monosyllable Count:** {monosyllable_count}")
-
-        st.write("  ")
-
-        st.subheader("Key Takeaways:")
-        st.write("---")
-        st.markdown("""
-        *  **Don't Be Afraid to Simplify!**  Often, simpler language makes content more impactful and easier to digest. 
-        *  **Aim for a Reading Level Appropriate for Your Audience:** Consider the education level, background, and familiarity of your readers.
-        *  **Use Short Sentences:** This makes your content more scannable and easier to read.
-        *  **Write for Everyone:** Accessibility should always be a priority. When in doubt, aim for clear, concise language! 
-        """)
+            st.subheader("Key Takeaways:")
+            st.write("---")
+            st.markdown("""
+                * **Don't Be Afraid to Simplify!** Often, simpler language makes content more impactful and easier to digest.
+                * **Aim for a Reading Level Appropriate for Your Audience:** Consider the education level, background, and familiarity of your readers.
+                * **Use Short Sentences:** This makes your content more scannable and easier to read.
+                * **Write for Everyone:** Accessibility should always be a priority. When in doubt, aim for clear, concise language!
+            """)