Made changes to Getting started with ALwrity and added lot of details on API keys
This commit is contained in:
@@ -2,7 +2,7 @@ import os
|
||||
import json
|
||||
import streamlit as st
|
||||
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
||||
import cloudscraper
|
||||
import crawl4ai
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import csv
|
||||
@@ -18,7 +18,7 @@ from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
def fetch_and_parse_html(url):
|
||||
"""
|
||||
Fetches HTML content from the given URL using CloudScraper and parses it with BeautifulSoup.
|
||||
Fetches HTML content from the given URL using crawl4ai and parses it with BeautifulSoup.
|
||||
|
||||
Args:
|
||||
url (str): The URL of the webpage to fetch.
|
||||
@@ -27,9 +27,8 @@ def fetch_and_parse_html(url):
|
||||
BeautifulSoup: Parsed HTML content.
|
||||
"""
|
||||
try:
|
||||
scraper = cloudscraper.create_scraper()
|
||||
html = scraper.get(url)
|
||||
soup = BeautifulSoup(html.text, 'html.parser')
|
||||
html = crawl4ai.get(url)
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
return soup
|
||||
except Exception as e:
|
||||
st.error(f"⚠️ Error fetching or parsing HTML: {e}")
|
||||
|
||||
@@ -1,7 +1,35 @@
|
||||
import textstat
|
||||
import streamlit as st
|
||||
"""Text analysis tools using textstat."""
|
||||
|
||||
st.set_page_config(layout="wide", page_title="Text Readability Analyzer", page_icon=":book:")
|
||||
import streamlit as st
|
||||
from textstat import textstat
|
||||
|
||||
def analyze_text(text):
|
||||
"""Analyze text using textstat metrics."""
|
||||
if not text:
|
||||
st.warning("Please enter some text to analyze.")
|
||||
return
|
||||
|
||||
# Calculate various metrics
|
||||
metrics = {
|
||||
"Flesch Reading Ease": textstat.flesch_reading_ease(text),
|
||||
"Flesch-Kincaid Grade Level": textstat.flesch_kincaid_grade(text),
|
||||
"Gunning Fog Index": textstat.gunning_fog(text),
|
||||
"SMOG Index": textstat.smog_index(text),
|
||||
"Automated Readability Index": textstat.automated_readability_index(text),
|
||||
"Coleman-Liau Index": textstat.coleman_liau_index(text),
|
||||
"Linsear Write Formula": textstat.linsear_write_formula(text),
|
||||
"Dale-Chall Readability Score": textstat.dale_chall_readability_score(text),
|
||||
"Readability Consensus": textstat.readability_consensus(text)
|
||||
}
|
||||
|
||||
# Display metrics in a clean format
|
||||
st.subheader("Text Analysis Results")
|
||||
for metric, value in metrics.items():
|
||||
st.metric(metric, f"{value:.2f}")
|
||||
|
||||
# Add visualizations
|
||||
st.subheader("Visualization")
|
||||
st.bar_chart(metrics)
|
||||
|
||||
st.title("📖 Text Readability Analyzer: Making Your Content Easy to Read")
|
||||
|
||||
@@ -10,122 +38,6 @@ st.write("""
|
||||
Just paste in a sample of your text, and we'll break down the readability scores and offer actionable tips!
|
||||
""")
|
||||
|
||||
|
||||
def analyze_text(test_data):
|
||||
"""
|
||||
Analyzes the readability of the provided text and returns a dictionary with the results.
|
||||
|
||||
Parameters:
|
||||
test_data (str): The text to be analyzed.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing readability scores and additional metrics.
|
||||
"""
|
||||
return {
|
||||
"Flesch Reading Ease": {
|
||||
"score": textstat.flesch_reading_ease(test_data),
|
||||
"description": "This score rates your text on a scale of 0-100, with higher scores being easier to read.",
|
||||
"tips": [
|
||||
"Score below 30? Simplify your text by breaking down complex sentences, using shorter words, and avoiding jargon.",
|
||||
"Score around 60-70? You're in the 'standard' range.",
|
||||
"Score over 90? Your text is very easy to read. Add some complexity or sophistication if needed."
|
||||
]
|
||||
},
|
||||
"Flesch-Kincaid Grade Level": {
|
||||
"score": textstat.flesch_kincaid_grade(test_data),
|
||||
"description": "This formula estimates the US school grade level needed to understand your text.",
|
||||
"tips": [
|
||||
"High Score? Your writing might be too complex for your target audience.",
|
||||
"Low Score? Your audience might find the text too simple.",
|
||||
"Match Your Audience: Tailor the complexity to your readers."
|
||||
]
|
||||
},
|
||||
"SMOG Index": {
|
||||
"score": textstat.smog_index(test_data),
|
||||
"description": "This formula measures text complexity by looking at the number of long words and sentences.",
|
||||
"tips": [
|
||||
"Best for texts with at least 30 sentences.",
|
||||
"Adjust complexity to match your target audience."
|
||||
]
|
||||
},
|
||||
"Coleman-Liau Index": {
|
||||
"score": textstat.coleman_liau_index(test_data),
|
||||
"description": "This formula uses sentence length and the number of syllables per word to estimate the reading level."
|
||||
},
|
||||
"Automated Readability Index (ARI)": {
|
||||
"score": textstat.automated_readability_index(test_data),
|
||||
"description": "Estimates the grade level required to comprehend your text."
|
||||
},
|
||||
"Dale-Chall Readability Score": {
|
||||
"score": textstat.dale_chall_readability_score(test_data),
|
||||
"description": "Focuses on the number of uncommon words (not on a list of 3000 common words) and sentence length.",
|
||||
"tips": [
|
||||
"Easy to Understand: Aim for a score around the reading level of your audience.",
|
||||
"High School Level? Scores between 9 and 12 indicate a high school reading level.",
|
||||
"Beyond High School? Scores above 12 are usually for a college-level audience."
|
||||
]
|
||||
},
|
||||
"Gunning Fog": {
|
||||
"score": textstat.gunning_fog(test_data),
|
||||
"description": "Calculates the grade level required to understand the text."
|
||||
},
|
||||
"Linsear Write Formula": {
|
||||
"score": textstat.linsear_write_formula(test_data),
|
||||
"description": "Estimates the US grade level needed to understand the text."
|
||||
},
|
||||
"Text Standard (Consensus)": {
|
||||
"score": textstat.text_standard(test_data),
|
||||
"description": "A consensus estimate of the US grade level needed to understand your text, based on multiple readability scores."
|
||||
},
|
||||
"Spache Readability": {
|
||||
"score": textstat.spache_readability(test_data),
|
||||
"description": "Best for analyzing text for children, typically up to grade 4.",
|
||||
"tips": [
|
||||
"Considers the number of unfamiliar words and the length of sentences."
|
||||
]
|
||||
},
|
||||
"McAlpine EFLAW": {
|
||||
"score": textstat.mcalpine_eflaw(test_data),
|
||||
"description": "Evaluates text for foreign language learners, focusing on 'miniwords' and sentence length.",
|
||||
"tips": [
|
||||
"Target Score: Aim for a score of 25 or less."
|
||||
]
|
||||
},
|
||||
"Reading Time": {
|
||||
"score": textstat.reading_time(test_data),
|
||||
"description": "Estimated reading time in minutes."
|
||||
},
|
||||
"Syllable Count": {
|
||||
"score": textstat.syllable_count(test_data),
|
||||
"description": "The number of syllables in the text."
|
||||
},
|
||||
"Word Count": {
|
||||
"score": textstat.lexicon_count(test_data),
|
||||
"description": "The number of words in the text."
|
||||
},
|
||||
"Sentence Count": {
|
||||
"score": textstat.sentence_count(test_data),
|
||||
"description": "The number of sentences in the text."
|
||||
},
|
||||
"Character Count": {
|
||||
"score": textstat.char_count(test_data),
|
||||
"description": "The number of characters in the text."
|
||||
},
|
||||
"Letter Count (without punctuation)": {
|
||||
"score": textstat.letter_count(test_data),
|
||||
"description": "The number of letters without punctuation."
|
||||
},
|
||||
"Polysyllable Count": {
|
||||
"score": textstat.polysyllabcount(test_data),
|
||||
"description": "The number of polysyllabic words in the text."
|
||||
},
|
||||
"Monosyllable Count": {
|
||||
"score": textstat.monosyllabcount(test_data),
|
||||
"description": "The number of monosyllabic words in the text."
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
text_input = st.text_area("Paste your text here:", height=200)
|
||||
|
||||
if st.button("Analyze!"):
|
||||
@@ -134,18 +46,7 @@ if st.button("Analyze!"):
|
||||
if not test_data.strip():
|
||||
st.error("Please enter text to analyze.")
|
||||
else:
|
||||
results = analyze_text(test_data)
|
||||
|
||||
st.subheader("Readability Scores:")
|
||||
st.write("---")
|
||||
for metric, data in results.items():
|
||||
st.markdown(f"**{metric}:** {data['score']}")
|
||||
st.markdown(f"* **What It Means:** {data['description']}")
|
||||
if 'tips' in data:
|
||||
st.markdown("* **Actionable Tips:**")
|
||||
for tip in data['tips']:
|
||||
st.markdown(f" * {tip}")
|
||||
st.write(" ")
|
||||
analyze_text(test_data)
|
||||
|
||||
st.subheader("Key Takeaways:")
|
||||
st.write("---")
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
"""Webpage content analysis tool."""
|
||||
|
||||
import streamlit as st
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
@@ -7,8 +9,7 @@ from nltk.tokenize import word_tokenize
|
||||
from nltk.util import ngrams
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import ConversationChain
|
||||
|
||||
st.set_page_config(layout="wide", page_title="Web Content Analyzer - Dive Deep with AI!", page_icon=":mag_right:")
|
||||
from urllib.parse import urlparse
|
||||
|
||||
st.title("🧠 Web Content Analyzer: Uncover Hidden Insights with AI! 🧠")
|
||||
st.write("""
|
||||
@@ -39,19 +40,36 @@ if st.button("Analyze with AI!"):
|
||||
st.stop()
|
||||
|
||||
try:
|
||||
# Validate URL
|
||||
parsed_url = urlparse(url)
|
||||
if not parsed_url.scheme:
|
||||
url = "https://" + url
|
||||
|
||||
# Fetch webpage content
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
body_txt = soup.find('body').text
|
||||
|
||||
words = [w.lower() for w in word_tokenize(body_txt)]
|
||||
stopw = nltk.corpus.stopwords.words(language)
|
||||
|
||||
final_words = [w for w in words if w not in stopw and w.isalpha()]
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse HTML
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Extract content
|
||||
title = soup.title.string if soup.title else "No title found"
|
||||
meta_description = soup.find('meta', {'name': 'description'})
|
||||
description = meta_description['content'] if meta_description else "No description found"
|
||||
|
||||
# Display results
|
||||
st.subheader("Page Analysis")
|
||||
st.metric("Title", title)
|
||||
st.metric("Description", description)
|
||||
|
||||
# Content statistics
|
||||
text_content = soup.get_text()
|
||||
words = text_content.split()
|
||||
st.metric("Word Count", len(words))
|
||||
st.metric("Unique Words", len(set(words)))
|
||||
|
||||
# Frequency analysis (same as before)
|
||||
freq = nltk.FreqDist(final_words)
|
||||
freq = nltk.FreqDist(words)
|
||||
keywords = freq.most_common(10)
|
||||
df_keywords = pd.DataFrame(keywords, columns=("Keyword", "Frequency"))
|
||||
|
||||
@@ -60,19 +78,19 @@ if st.button("Analyze with AI!"):
|
||||
st.write(" ")
|
||||
|
||||
st.markdown("**Main Theme:**")
|
||||
ai_theme = conversation_chain.run(f"What is the main theme or topic of this content? \n {body_txt}")
|
||||
ai_theme = conversation_chain.run(f"What is the main theme or topic of this content? \n {text_content}")
|
||||
st.markdown(f" {ai_theme}")
|
||||
|
||||
st.write(" ")
|
||||
|
||||
st.markdown("**Suggested Keywords:**")
|
||||
ai_keywords = conversation_chain.run(f"What other relevant keywords might be helpful to target for this content? \n {body_txt}")
|
||||
ai_keywords = conversation_chain.run(f"What other relevant keywords might be helpful to target for this content? \n {text_content}")
|
||||
st.markdown(f" {ai_keywords}")
|
||||
|
||||
st.write(" ")
|
||||
|
||||
st.markdown("**Content Improvement:**")
|
||||
ai_improvement = conversation_chain.run(f"What could be done to improve this content for clarity, engagement, or SEO? \n {body_txt}")
|
||||
ai_improvement = conversation_chain.run(f"What could be done to improve this content for clarity, engagement, or SEO? \n {text_content}")
|
||||
st.markdown(f" {ai_improvement}")
|
||||
|
||||
# --- Display Frequency Results ---
|
||||
@@ -94,3 +112,5 @@ if st.button("Analyze with AI!"):
|
||||
""")
|
||||
except requests.exceptions.RequestException as e:
|
||||
st.error(f"Oops! Something went wrong fetching the URL. Error: {e}")
|
||||
except Exception as e:
|
||||
st.error(f"An error occurred: {e}")
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
"""Word cloud generation tool."""
|
||||
|
||||
import streamlit as st
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
@@ -5,8 +7,8 @@ import pandas as pd
|
||||
import nltk
|
||||
from nltk.tokenize import word_tokenize
|
||||
from nltk.util import ngrams
|
||||
|
||||
st.set_page_config(layout="wide", page_title="Web Content Analyzer - Dive Into Your Words!", page_icon=":mag:")
|
||||
from wordcloud import WordCloud
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
st.title("🔎 Web Content Analyzer: Uncover Your Words' Power! 🔎")
|
||||
st.write("""
|
||||
@@ -86,3 +88,26 @@ if st.button("Analyze Your Content!"):
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
st.error(f"Oops! Something went wrong fetching the URL. Error: {e}")
|
||||
|
||||
def generate_wordcloud(text):
|
||||
"""Generate a word cloud from the given text."""
|
||||
if not text:
|
||||
st.warning("Please enter some text to generate a word cloud.")
|
||||
return
|
||||
|
||||
# Create and generate a word cloud image
|
||||
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
|
||||
|
||||
# Display the word cloud
|
||||
st.subheader("Word Cloud Visualization")
|
||||
fig, ax = plt.subplots(figsize=(10, 5))
|
||||
ax.imshow(wordcloud, interpolation='bilinear')
|
||||
ax.axis('off')
|
||||
st.pyplot(fig)
|
||||
|
||||
# Add some statistics
|
||||
st.subheader("Text Statistics")
|
||||
words = text.split()
|
||||
unique_words = set(words)
|
||||
st.metric("Total Words", len(words))
|
||||
st.metric("Unique Words", len(unique_words))
|
||||
|
||||
Reference in New Issue
Block a user