Initial version of blog Gen

This commit is contained in:
AjaySi
2023-10-07 10:51:22 +05:30
commit 12010c0478
27 changed files with 1295 additions and 0 deletions

33
lib/seo_module/README.md Normal file
View File

@@ -0,0 +1,33 @@
## Implementation approach
To implement the SEO module, we will use the following open-source tools and frameworks:
1. Natural Language Toolkit (NLTK): NLTK is a popular library for natural language processing in Python. We can leverage NLTK to perform various SEO checks on the given text, such as keyword density, readability analysis, and sentiment analysis.
2. Beautiful Soup: Beautiful Soup is a Python library for web scraping. We can use Beautiful Soup to extract relevant information from the given text, such as meta tags, headings, and image alt attributes.
3. PyEnchant: PyEnchant is a spell checking library for Python. We can utilize PyEnchant to check the spelling and grammar of the given text and provide suggestions for improvement.
4. TextBlob: TextBlob is a library for processing textual data. We can use TextBlob to perform part-of-speech tagging, noun phrase extraction, and other linguistic analyses on the given text.
5. Flask: Use Flask for local testing and development purposes. Flask provides a lightweight web framework that allows us to quickly build and test our SEO module.
Overall, by leveraging these open-source tools and frameworks, we can develop a comprehensive and efficient SEO module that meets the requirements and provides valuable insights and suggestions for improving the SEO of the given text.
## Required Python third-party packages
- nltk==3.6.2
- beautifulsoup4==4.9.3
- pyenchant==3.2.1
- textblob==0.15.3
- flask==1.1.2
## Modules
The 'text_processor.py' file contains the TextProcessor class, which is responsible for extracting meta tags, headings, and image alt attributes from the given text.
The 'spell_checker.py' file contains the SpellChecker class, which is responsible for checking the spelling and grammar of the given text.
The 'seo_checker.py' file contains the SEOChecker class, which is responsible for coordinating the SEO checks by utilizing the TextProcessor and SpellChecker classes.

View File

@@ -0,0 +1,135 @@
###################################################
#
# The script covers many SEO factors, including keyword presence, title length,
# meta description, images, img alt text, headings, internal links, external links,
# spelling errors, grammar errors, and readability.
#
##################################################
import re
from bs4 import BeautifulSoup
from textstat import flesch_reading_ease
import spellchecker
class SEOAnalyzer:
def __init__(self, html_content, target_keywords):
self.html_content = html_content
self.target_keywords = target_keywords
def analyze_html_content(self):
try:
soup = BeautifulSoup(self.html_content, 'html.parser')
# Extract and clean text from HTML
text = ' '.join(soup.stripped_strings)
text = re.sub(r'\s+', ' ', text)
# Calculate keyword density
keyword_density = {}
for keyword in self.target_keywords:
keyword_density[keyword] = (text.lower().count(keyword.lower()) / len(text.split())) * 100
# Check for the presence of keywords in the title
title_tag = soup.find('title')
title_text = title_tag.text.lower() if title_tag else ''
keyword_presence_in_title = {keyword: keyword.lower() in title_text for keyword in self.target_keywords}
# Check for the presence of images and keywords in image alt text
images = soup.find_all('img')
img_alt_text = [img.get('alt', '').lower() for img in images]
keyword_presence_in_img_alt_text = {keyword: any(keyword.lower() in alt_text for alt_text in img_alt_text) for keyword in self.target_keywords}
# Check for the presence of headings
headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
headings_text = ' '.join(heading.text.lower() for heading in headings)
# Check for the presence of internal and external links
internal_links = len([link for link in soup.find_all('a') if '#' not in link.get('href', '')])
external_links = len([link for link in soup.find_all('a') if 'http' in link.get('href', '')])
# Calculate readability score
readability_score = flesch_reading_ease(text)
# Check for spelling and grammar errors
spell = spellchecker.SpellChecker()
spelling_errors = len(spell.unknown(text.split()))
grammar_errors = len(spell.check_grammar(text))
# Calculate SEO score
seo_score = 0
# Check for the presence of relevant keywords
for keyword in self.target_keywords:
if keyword in text.lower():
seo_score += 1
# Check for title length
title_length = len(title_text.split()) if title_text else 0
recommended_title_length = (50, 70)
if recommended_title_length[0] <= title_length <= recommended_title_length[1]:
seo_score += 1
# Generate suggestions for improvement
suggestions = []
if seo_score < 5:
suggestions.append("Add more relevant keywords to your HTML content.")
suggestions.append("Make sure your title contains keywords.")
suggestions.append("Add keywords to image alt text.")
suggestions.append("Add headings to your HTML content.")
suggestions.append("Add internal links to your HTML content.")
return {
'Keyword Density': keyword_density,
'Keyword Presence in Title': keyword_presence_in_title,
'Keyword Presence in Image Alt Text': keyword_presence_in_img_alt_text,
'Headings Text': headings_text,
'Internal Links': internal_links,
'External Links': external_links,
'Readability Score': readability_score,
'Spelling Errors': spelling_errors,
'Grammar Errors': grammar_errors,
'SEO Score': seo_score,
'Suggestions': suggestions
}
except Exception as e:
return {'error': str(e)}
# Example usage:
if __name__ == "__main__":
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>SEO Analyzer - Sample Page</title>
<meta name="description" content="This is a sample page for SEO analysis.">
</head>
<body>
<h1>Welcome to the SEO Analyzer</h1>
<p>This is a sample page with some sample content for SEO analysis. It mentions the target keywords SEO, keywords, and content.</p>
<img src="image1.jpg" alt="SEO image">
<img src="image2.jpg" alt="Keywords image">
</body>
</html>
"""
keywords = ['SEO', 'keywords', 'content'] # Replace with your target keywords
seo_analyzer = SEOAnalyzer(html_content, keywords)
results = seo_analyzer.analyze_html_content()
print("SEO Analysis Results:")
print(f"Keyword Density: {results['Keyword Density']}")
print(f"Keyword Presence in Title: {results['Keyword Presence in Title']}")
print(f"Keyword Presence in Image Alt Text: {results['Keyword Presence in Image Alt Text']}")
print(f"Headings Text: {results['Headings Text']}")
print(f"Internal Links: {results['Internal Links']}")
print(f"External Links: {results['External Links']}")
print(f"Readability Score: {results['Readability Score']}")
print(f"Spelling Errors: {results['Spelling Errors']}")
print(f"Grammar Errors: {results['Grammar Errors']}")
print(f"SEO Score: {results['SEO Score']}")
print("Suggestions:")
for suggestion in results['Suggestions']:
print(suggestion)

View File

@@ -0,0 +1,115 @@
from typing import List, Dict, Union
from nltk import tokenize, stem, pos_tag
from textblob import TextBlob
import enchant
class TextPreprocessor:
def preprocess_text(self, text: str) -> str:
# Tokenize the text
tokens = tokenize.word_tokenize(text)
# Stem the tokens
stemmer = stem.PorterStemmer()
stemmed_tokens = [stemmer.stem(token) for token in tokens]
# Join the stemmed tokens back into a string
preprocessed_text = ' '.join(stemmed_tokens)
return preprocessed_text
class SEOAnalyzer:
def calculate_seo_percentage(self, text: str, keywords: List[str]) -> float:
# Calculate the keyword density
keyword_density = self.calculate_keyword_density(text, keywords)
# Calculate the readability score
readability_score = self.calculate_readability_score(text)
# Perform semantic analysis
semantic_score = self.perform_semantic_analysis(text)
# Calculate the SEO percentage based on the metrics
seo_percentage = (keyword_density + readability_score + semantic_score) / 3
return seo_percentage
def calculate_keyword_density(self, text: str, keywords: List[str]) -> float:
# Count the number of occurrences of each keyword in the text
keyword_counts = {keyword: text.lower().count(keyword.lower()) for keyword in keywords}
# Calculate the total number of words in the text
word_count = len(tokenize.word_tokenize(text))
# Calculate the keyword density
keyword_density = sum(keyword_counts.values()) / word_count
return keyword_density
def calculate_readability_score(self, text: str) -> float:
# Calculate the average number of words per sentence
sentences = tokenize.sent_tokenize(text)
word_count = sum(len(tokenize.word_tokenize(sentence)) for sentence in sentences)
sentence_count = len(sentences)
average_words_per_sentence = word_count / sentence_count
# Calculate the readability score
readability_score = 1 / average_words_per_sentence
return readability_score
def perform_semantic_analysis(self, text: str) -> float:
# Perform part-of-speech tagging on the text
tagged_text = pos_tag(tokenize.word_tokenize(text))
# Calculate the semantic score based on the number of nouns and verbs
noun_count = sum(1 for word, pos in tagged_text if pos.startswith('N'))
verb_count = sum(1 for word, pos in tagged_text if pos.startswith('V'))
semantic_score = (noun_count + verb_count) / len(tagged_text)
return semantic_score
class SpellChecker:
def check_spelling(self, text: str) -> List[str]:
# Create a spellchecker object
spellchecker = enchant.Dict("en_US")
# Tokenize the text
tokens = tokenize.word_tokenize(text)
# Check the spelling of each token
misspelled_words = [token for token in tokens if not spellchecker.check(token)]
return misspelled_words
class SEOAnalysisModule:
def __init__(self):
self.text_preprocessor = TextPreprocessor()
self.seo_analyzer = SEOAnalyzer()
self.spell_checker = SpellChecker()
def analyze_text(self, text: str, keywords: List[str]) -> Dict[str, Union[float, List[str]]]:
# Preprocess the text
preprocessed_text = self.text_preprocessor.preprocess_text(text)
# Calculate the SEO percentage
seo_percentage = self.seo_analyzer.calculate_seo_percentage(preprocessed_text, keywords)
# Calculate the keyword density
keyword_density = self.seo_analyzer.calculate_keyword_density(preprocessed_text, keywords)
# Calculate the readability score
readability_score = self.seo_analyzer.calculate_readability_score(preprocessed_text)
# Perform semantic analysis
semantic_score = self.seo_analyzer.perform_semantic_analysis(preprocessed_text)
# Check the spelling
spelling_errors = self.spell_checker.check_spelling(preprocessed_text)
return {
'seo_percentage': seo_percentage,
'keyword_density': keyword_density,
'readability_score': readability_score,
'semantic_score': semantic_score,
'spelling_errors': spelling_errors
}