Made changes to Getting started with ALwrity and added lot of details on API keys

This commit is contained in:
ajaysi
2025-04-01 13:11:40 +05:30
committed by ي
parent 367f9bac2c
commit 7d6ea91e6a
68 changed files with 8384 additions and 823 deletions

View File

@@ -2,7 +2,7 @@ import os
import json
import streamlit as st
from tenacity import retry, stop_after_attempt, wait_random_exponential
import cloudscraper
import crawl4ai
from bs4 import BeautifulSoup
import requests
import csv
@@ -18,7 +18,7 @@ from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
def fetch_and_parse_html(url):
"""
Fetches HTML content from the given URL using CloudScraper and parses it with BeautifulSoup.
Fetches HTML content from the given URL using crawl4ai and parses it with BeautifulSoup.
Args:
url (str): The URL of the webpage to fetch.
@@ -27,9 +27,8 @@ def fetch_and_parse_html(url):
BeautifulSoup: Parsed HTML content.
"""
try:
scraper = cloudscraper.create_scraper()
html = scraper.get(url)
soup = BeautifulSoup(html.text, 'html.parser')
html = crawl4ai.get(url)
soup = BeautifulSoup(html, 'html.parser')
return soup
except Exception as e:
st.error(f"⚠️ Error fetching or parsing HTML: {e}")

View File

@@ -1,7 +1,35 @@
import textstat
import streamlit as st
"""Text analysis tools using textstat."""
st.set_page_config(layout="wide", page_title="Text Readability Analyzer", page_icon=":book:")
import streamlit as st
from textstat import textstat
def analyze_text(text):
"""Analyze text using textstat metrics."""
if not text:
st.warning("Please enter some text to analyze.")
return
# Calculate various metrics
metrics = {
"Flesch Reading Ease": textstat.flesch_reading_ease(text),
"Flesch-Kincaid Grade Level": textstat.flesch_kincaid_grade(text),
"Gunning Fog Index": textstat.gunning_fog(text),
"SMOG Index": textstat.smog_index(text),
"Automated Readability Index": textstat.automated_readability_index(text),
"Coleman-Liau Index": textstat.coleman_liau_index(text),
"Linsear Write Formula": textstat.linsear_write_formula(text),
"Dale-Chall Readability Score": textstat.dale_chall_readability_score(text),
"Readability Consensus": textstat.readability_consensus(text)
}
# Display metrics in a clean format
st.subheader("Text Analysis Results")
for metric, value in metrics.items():
st.metric(metric, f"{value:.2f}")
# Add visualizations
st.subheader("Visualization")
st.bar_chart(metrics)
st.title("📖 Text Readability Analyzer: Making Your Content Easy to Read")
@@ -10,122 +38,6 @@ st.write("""
Just paste in a sample of your text, and we'll break down the readability scores and offer actionable tips!
""")
def analyze_text(test_data):
"""
Analyzes the readability of the provided text and returns a dictionary with the results.
Parameters:
test_data (str): The text to be analyzed.
Returns:
dict: A dictionary containing readability scores and additional metrics.
"""
return {
"Flesch Reading Ease": {
"score": textstat.flesch_reading_ease(test_data),
"description": "This score rates your text on a scale of 0-100, with higher scores being easier to read.",
"tips": [
"Score below 30? Simplify your text by breaking down complex sentences, using shorter words, and avoiding jargon.",
"Score around 60-70? You're in the 'standard' range.",
"Score over 90? Your text is very easy to read. Add some complexity or sophistication if needed."
]
},
"Flesch-Kincaid Grade Level": {
"score": textstat.flesch_kincaid_grade(test_data),
"description": "This formula estimates the US school grade level needed to understand your text.",
"tips": [
"High Score? Your writing might be too complex for your target audience.",
"Low Score? Your audience might find the text too simple.",
"Match Your Audience: Tailor the complexity to your readers."
]
},
"SMOG Index": {
"score": textstat.smog_index(test_data),
"description": "This formula measures text complexity by looking at the number of long words and sentences.",
"tips": [
"Best for texts with at least 30 sentences.",
"Adjust complexity to match your target audience."
]
},
"Coleman-Liau Index": {
"score": textstat.coleman_liau_index(test_data),
"description": "This formula uses sentence length and the number of syllables per word to estimate the reading level."
},
"Automated Readability Index (ARI)": {
"score": textstat.automated_readability_index(test_data),
"description": "Estimates the grade level required to comprehend your text."
},
"Dale-Chall Readability Score": {
"score": textstat.dale_chall_readability_score(test_data),
"description": "Focuses on the number of uncommon words (not on a list of 3000 common words) and sentence length.",
"tips": [
"Easy to Understand: Aim for a score around the reading level of your audience.",
"High School Level? Scores between 9 and 12 indicate a high school reading level.",
"Beyond High School? Scores above 12 are usually for a college-level audience."
]
},
"Gunning Fog": {
"score": textstat.gunning_fog(test_data),
"description": "Calculates the grade level required to understand the text."
},
"Linsear Write Formula": {
"score": textstat.linsear_write_formula(test_data),
"description": "Estimates the US grade level needed to understand the text."
},
"Text Standard (Consensus)": {
"score": textstat.text_standard(test_data),
"description": "A consensus estimate of the US grade level needed to understand your text, based on multiple readability scores."
},
"Spache Readability": {
"score": textstat.spache_readability(test_data),
"description": "Best for analyzing text for children, typically up to grade 4.",
"tips": [
"Considers the number of unfamiliar words and the length of sentences."
]
},
"McAlpine EFLAW": {
"score": textstat.mcalpine_eflaw(test_data),
"description": "Evaluates text for foreign language learners, focusing on 'miniwords' and sentence length.",
"tips": [
"Target Score: Aim for a score of 25 or less."
]
},
"Reading Time": {
"score": textstat.reading_time(test_data),
"description": "Estimated reading time in minutes."
},
"Syllable Count": {
"score": textstat.syllable_count(test_data),
"description": "The number of syllables in the text."
},
"Word Count": {
"score": textstat.lexicon_count(test_data),
"description": "The number of words in the text."
},
"Sentence Count": {
"score": textstat.sentence_count(test_data),
"description": "The number of sentences in the text."
},
"Character Count": {
"score": textstat.char_count(test_data),
"description": "The number of characters in the text."
},
"Letter Count (without punctuation)": {
"score": textstat.letter_count(test_data),
"description": "The number of letters without punctuation."
},
"Polysyllable Count": {
"score": textstat.polysyllabcount(test_data),
"description": "The number of polysyllabic words in the text."
},
"Monosyllable Count": {
"score": textstat.monosyllabcount(test_data),
"description": "The number of monosyllabic words in the text."
}
}
text_input = st.text_area("Paste your text here:", height=200)
if st.button("Analyze!"):
@@ -134,18 +46,7 @@ if st.button("Analyze!"):
if not test_data.strip():
st.error("Please enter text to analyze.")
else:
results = analyze_text(test_data)
st.subheader("Readability Scores:")
st.write("---")
for metric, data in results.items():
st.markdown(f"**{metric}:** {data['score']}")
st.markdown(f"* **What It Means:** {data['description']}")
if 'tips' in data:
st.markdown("* **Actionable Tips:**")
for tip in data['tips']:
st.markdown(f" * {tip}")
st.write(" ")
analyze_text(test_data)
st.subheader("Key Takeaways:")
st.write("---")

View File

@@ -1,3 +1,5 @@
"""Webpage content analysis tool."""
import streamlit as st
import requests
from bs4 import BeautifulSoup
@@ -7,8 +9,7 @@ from nltk.tokenize import word_tokenize
from nltk.util import ngrams
from langchain.llms import OpenAI
from langchain.chains import ConversationChain
st.set_page_config(layout="wide", page_title="Web Content Analyzer - Dive Deep with AI!", page_icon=":mag_right:")
from urllib.parse import urlparse
st.title("🧠 Web Content Analyzer: Uncover Hidden Insights with AI! 🧠")
st.write("""
@@ -39,19 +40,36 @@ if st.button("Analyze with AI!"):
st.stop()
try:
# Validate URL
parsed_url = urlparse(url)
if not parsed_url.scheme:
url = "https://" + url
# Fetch webpage content
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
body_txt = soup.find('body').text
words = [w.lower() for w in word_tokenize(body_txt)]
stopw = nltk.corpus.stopwords.words(language)
final_words = [w for w in words if w not in stopw and w.isalpha()]
response.raise_for_status()
# Parse HTML
soup = BeautifulSoup(response.text, 'html.parser')
# Extract content
title = soup.title.string if soup.title else "No title found"
meta_description = soup.find('meta', {'name': 'description'})
description = meta_description['content'] if meta_description else "No description found"
# Display results
st.subheader("Page Analysis")
st.metric("Title", title)
st.metric("Description", description)
# Content statistics
text_content = soup.get_text()
words = text_content.split()
st.metric("Word Count", len(words))
st.metric("Unique Words", len(set(words)))
# Frequency analysis (same as before)
freq = nltk.FreqDist(final_words)
freq = nltk.FreqDist(words)
keywords = freq.most_common(10)
df_keywords = pd.DataFrame(keywords, columns=("Keyword", "Frequency"))
@@ -60,19 +78,19 @@ if st.button("Analyze with AI!"):
st.write(" ")
st.markdown("**Main Theme:**")
ai_theme = conversation_chain.run(f"What is the main theme or topic of this content? \n {body_txt}")
ai_theme = conversation_chain.run(f"What is the main theme or topic of this content? \n {text_content}")
st.markdown(f" {ai_theme}")
st.write(" ")
st.markdown("**Suggested Keywords:**")
ai_keywords = conversation_chain.run(f"What other relevant keywords might be helpful to target for this content? \n {body_txt}")
ai_keywords = conversation_chain.run(f"What other relevant keywords might be helpful to target for this content? \n {text_content}")
st.markdown(f" {ai_keywords}")
st.write(" ")
st.markdown("**Content Improvement:**")
ai_improvement = conversation_chain.run(f"What could be done to improve this content for clarity, engagement, or SEO? \n {body_txt}")
ai_improvement = conversation_chain.run(f"What could be done to improve this content for clarity, engagement, or SEO? \n {text_content}")
st.markdown(f" {ai_improvement}")
# --- Display Frequency Results ---
@@ -94,3 +112,5 @@ if st.button("Analyze with AI!"):
""")
except requests.exceptions.RequestException as e:
st.error(f"Oops! Something went wrong fetching the URL. Error: {e}")
except Exception as e:
st.error(f"An error occurred: {e}")

View File

@@ -1,3 +1,5 @@
"""Word cloud generation tool."""
import streamlit as st
import requests
from bs4 import BeautifulSoup
@@ -5,8 +7,8 @@ import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.util import ngrams
st.set_page_config(layout="wide", page_title="Web Content Analyzer - Dive Into Your Words!", page_icon=":mag:")
from wordcloud import WordCloud
import matplotlib.pyplot as plt
st.title("🔎 Web Content Analyzer: Uncover Your Words' Power! 🔎")
st.write("""
@@ -86,3 +88,26 @@ if st.button("Analyze Your Content!"):
except requests.exceptions.RequestException as e:
st.error(f"Oops! Something went wrong fetching the URL. Error: {e}")
def generate_wordcloud(text):
"""Generate a word cloud from the given text."""
if not text:
st.warning("Please enter some text to generate a word cloud.")
return
# Create and generate a word cloud image
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
# Display the word cloud
st.subheader("Word Cloud Visualization")
fig, ax = plt.subplots(figsize=(10, 5))
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
st.pyplot(fig)
# Add some statistics
st.subheader("Text Statistics")
words = text.split()
unique_words = set(words)
st.metric("Total Words", len(words))
st.metric("Unique Words", len(unique_words))