Features: AI Rich snippet from url, AI product description writer
This commit is contained in:
1
lib/ai_seo_tools/TBD
Normal file
1
lib/ai_seo_tools/TBD
Normal file
@@ -0,0 +1 @@
|
||||
https://pypi.org/project/textstat/
|
||||
135
lib/ai_seo_tools/cgpt_seo_analyzer.py
Normal file
135
lib/ai_seo_tools/cgpt_seo_analyzer.py
Normal file
@@ -0,0 +1,135 @@
|
||||
###################################################
|
||||
#
|
||||
# The script covers many SEO factors, including keyword presence, title length,
|
||||
# meta description, images, img alt text, headings, internal links, external links,
|
||||
# spelling errors, grammar errors, and readability.
|
||||
#
|
||||
##################################################
|
||||
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from textstat import flesch_reading_ease
|
||||
import spellchecker
|
||||
|
||||
class SEOAnalyzer:
|
||||
def __init__(self, html_content, target_keywords):
|
||||
self.html_content = html_content
|
||||
self.target_keywords = target_keywords
|
||||
|
||||
def analyze_html_content(self):
|
||||
try:
|
||||
soup = BeautifulSoup(self.html_content, 'html.parser')
|
||||
|
||||
# Extract and clean text from HTML
|
||||
text = ' '.join(soup.stripped_strings)
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
|
||||
# Calculate keyword density
|
||||
keyword_density = {}
|
||||
for keyword in self.target_keywords:
|
||||
keyword_density[keyword] = (text.lower().count(keyword.lower()) / len(text.split())) * 100
|
||||
|
||||
# Check for the presence of keywords in the title
|
||||
title_tag = soup.find('title')
|
||||
title_text = title_tag.text.lower() if title_tag else ''
|
||||
keyword_presence_in_title = {keyword: keyword.lower() in title_text for keyword in self.target_keywords}
|
||||
|
||||
# Check for the presence of images and keywords in image alt text
|
||||
images = soup.find_all('img')
|
||||
img_alt_text = [img.get('alt', '').lower() for img in images]
|
||||
keyword_presence_in_img_alt_text = {keyword: any(keyword.lower() in alt_text for alt_text in img_alt_text) for keyword in self.target_keywords}
|
||||
|
||||
# Check for the presence of headings
|
||||
headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
||||
headings_text = ' '.join(heading.text.lower() for heading in headings)
|
||||
|
||||
# Check for the presence of internal and external links
|
||||
internal_links = len([link for link in soup.find_all('a') if '#' not in link.get('href', '')])
|
||||
external_links = len([link for link in soup.find_all('a') if 'http' in link.get('href', '')])
|
||||
|
||||
# Calculate readability score
|
||||
readability_score = flesch_reading_ease(text)
|
||||
|
||||
# Check for spelling and grammar errors
|
||||
spell = spellchecker.SpellChecker()
|
||||
spelling_errors = len(spell.unknown(text.split()))
|
||||
grammar_errors = len(spell.check_grammar(text))
|
||||
|
||||
# Calculate SEO score
|
||||
seo_score = 0
|
||||
|
||||
# Check for the presence of relevant keywords
|
||||
for keyword in self.target_keywords:
|
||||
if keyword in text.lower():
|
||||
seo_score += 1
|
||||
|
||||
# Check for title length
|
||||
title_length = len(title_text.split()) if title_text else 0
|
||||
recommended_title_length = (50, 70)
|
||||
|
||||
if recommended_title_length[0] <= title_length <= recommended_title_length[1]:
|
||||
seo_score += 1
|
||||
|
||||
# Generate suggestions for improvement
|
||||
suggestions = []
|
||||
if seo_score < 5:
|
||||
suggestions.append("Add more relevant keywords to your HTML content.")
|
||||
suggestions.append("Make sure your title contains keywords.")
|
||||
suggestions.append("Add keywords to image alt text.")
|
||||
suggestions.append("Add headings to your HTML content.")
|
||||
suggestions.append("Add internal links to your HTML content.")
|
||||
|
||||
return {
|
||||
'Keyword Density': keyword_density,
|
||||
'Keyword Presence in Title': keyword_presence_in_title,
|
||||
'Keyword Presence in Image Alt Text': keyword_presence_in_img_alt_text,
|
||||
'Headings Text': headings_text,
|
||||
'Internal Links': internal_links,
|
||||
'External Links': external_links,
|
||||
'Readability Score': readability_score,
|
||||
'Spelling Errors': spelling_errors,
|
||||
'Grammar Errors': grammar_errors,
|
||||
'SEO Score': seo_score,
|
||||
'Suggestions': suggestions
|
||||
}
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
html_content = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>SEO Analyzer - Sample Page</title>
|
||||
<meta name="description" content="This is a sample page for SEO analysis.">
|
||||
</head>
|
||||
<body>
|
||||
<h1>Welcome to the SEO Analyzer</h1>
|
||||
<p>This is a sample page with some sample content for SEO analysis. It mentions the target keywords SEO, keywords, and content.</p>
|
||||
<img src="image1.jpg" alt="SEO image">
|
||||
<img src="image2.jpg" alt="Keywords image">
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
keywords = ['SEO', 'keywords', 'content'] # Replace with your target keywords
|
||||
|
||||
seo_analyzer = SEOAnalyzer(html_content, keywords)
|
||||
results = seo_analyzer.analyze_html_content()
|
||||
|
||||
print("SEO Analysis Results:")
|
||||
print(f"Keyword Density: {results['Keyword Density']}")
|
||||
print(f"Keyword Presence in Title: {results['Keyword Presence in Title']}")
|
||||
print(f"Keyword Presence in Image Alt Text: {results['Keyword Presence in Image Alt Text']}")
|
||||
print(f"Headings Text: {results['Headings Text']}")
|
||||
print(f"Internal Links: {results['Internal Links']}")
|
||||
print(f"External Links: {results['External Links']}")
|
||||
print(f"Readability Score: {results['Readability Score']}")
|
||||
print(f"Spelling Errors: {results['Spelling Errors']}")
|
||||
print(f"Grammar Errors: {results['Grammar Errors']}")
|
||||
print(f"SEO Score: {results['SEO Score']}")
|
||||
print("Suggestions:")
|
||||
for suggestion in results['Suggestions']:
|
||||
print(suggestion)
|
||||
|
||||
80
lib/ai_seo_tools/plagiarism_checker_from_known_sources.py
Normal file
80
lib/ai_seo_tools/plagiarism_checker_from_known_sources.py
Normal file
@@ -0,0 +1,80 @@
|
||||
import re
|
||||
|
||||
#######################################################################
|
||||
#
|
||||
# I want you to act as a plagiarism checker. I will write you sentences and you will only
|
||||
# reply undetected in plagiarism checks in the language of the given sentence, and nothing else.
|
||||
# Do not write explanations on replies. My first sentence is {}
|
||||
#
|
||||
######################################################################
|
||||
|
||||
|
||||
class PlagiarismChecker:
|
||||
def __init__(self, known_sources):
|
||||
self.known_sources = known_sources
|
||||
|
||||
def check_plagiarism(self, html_content):
|
||||
try:
|
||||
# Preprocess the HTML content by removing HTML tags and extra spaces
|
||||
text = re.sub(r'<[^>]+>', ' ', html_content)
|
||||
text = re.sub(r'\s+', ' ', text).strip().lower()
|
||||
|
||||
# Check for exact matches with known sources
|
||||
for source in self.known_sources:
|
||||
source_text = re.sub(r'<[^>]+>', ' ', source)
|
||||
source_text = re.sub(r'\s+', ' ', source_text).strip().lower()
|
||||
if text == source_text:
|
||||
return f"Plagiarism detected: Matches known source - {source}"
|
||||
|
||||
# If no exact matches are found
|
||||
return "No plagiarism detected. Content is original."
|
||||
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
# List of known sources
|
||||
known_sources = [
|
||||
"""
|
||||
<html>
|
||||
<head>
|
||||
<title>Sample Page 1</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Hello, World!</h1>
|
||||
<p>This is sample content from known source 1.</p>
|
||||
</body>
|
||||
</html>
|
||||
""",
|
||||
"""
|
||||
<html>
|
||||
<head>
|
||||
<title>Sample Page 2</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Welcome to Known Source 2</h1>
|
||||
<p>This is some content from another known source.</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
]
|
||||
|
||||
# HTML content to check for plagiarism
|
||||
html_content = """
|
||||
<html>
|
||||
<head>
|
||||
<title>Sample Page</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Hello, World!</h1>
|
||||
<p>This is sample content.</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
plagiarism_checker = PlagiarismChecker(known_sources)
|
||||
result = plagiarism_checker.check_plagiarism(html_content)
|
||||
|
||||
print(result)
|
||||
|
||||
115
lib/ai_seo_tools/seo_analysis.py
Normal file
115
lib/ai_seo_tools/seo_analysis.py
Normal file
@@ -0,0 +1,115 @@
|
||||
from typing import List, Dict, Union
|
||||
#from nltk import tokenize, stem, pos_tag
|
||||
from textblob import TextBlob
|
||||
import enchant
|
||||
|
||||
class TextPreprocessor:
|
||||
def preprocess_text(self, text: str) -> str:
|
||||
# Tokenize the text
|
||||
tokens = tokenize.word_tokenize(text)
|
||||
|
||||
# Stem the tokens
|
||||
stemmer = stem.PorterStemmer()
|
||||
stemmed_tokens = [stemmer.stem(token) for token in tokens]
|
||||
|
||||
# Join the stemmed tokens back into a string
|
||||
preprocessed_text = ' '.join(stemmed_tokens)
|
||||
|
||||
return preprocessed_text
|
||||
|
||||
class SEOAnalyzer:
|
||||
def calculate_seo_percentage(self, text: str, keywords: List[str]) -> float:
|
||||
# Calculate the keyword density
|
||||
keyword_density = self.calculate_keyword_density(text, keywords)
|
||||
|
||||
# Calculate the readability score
|
||||
readability_score = self.calculate_readability_score(text)
|
||||
|
||||
# Perform semantic analysis
|
||||
semantic_score = self.perform_semantic_analysis(text)
|
||||
|
||||
# Calculate the SEO percentage based on the metrics
|
||||
seo_percentage = (keyword_density + readability_score + semantic_score) / 3
|
||||
|
||||
return seo_percentage
|
||||
|
||||
def calculate_keyword_density(self, text: str, keywords: List[str]) -> float:
|
||||
# Count the number of occurrences of each keyword in the text
|
||||
keyword_counts = {keyword: text.lower().count(keyword.lower()) for keyword in keywords}
|
||||
|
||||
# Calculate the total number of words in the text
|
||||
word_count = len(tokenize.word_tokenize(text))
|
||||
|
||||
# Calculate the keyword density
|
||||
keyword_density = sum(keyword_counts.values()) / word_count
|
||||
|
||||
return keyword_density
|
||||
|
||||
def calculate_readability_score(self, text: str) -> float:
|
||||
# Calculate the average number of words per sentence
|
||||
sentences = tokenize.sent_tokenize(text)
|
||||
word_count = sum(len(tokenize.word_tokenize(sentence)) for sentence in sentences)
|
||||
sentence_count = len(sentences)
|
||||
average_words_per_sentence = word_count / sentence_count
|
||||
|
||||
# Calculate the readability score
|
||||
readability_score = 1 / average_words_per_sentence
|
||||
|
||||
return readability_score
|
||||
|
||||
def perform_semantic_analysis(self, text: str) -> float:
|
||||
# Perform part-of-speech tagging on the text
|
||||
tagged_text = pos_tag(tokenize.word_tokenize(text))
|
||||
|
||||
# Calculate the semantic score based on the number of nouns and verbs
|
||||
noun_count = sum(1 for word, pos in tagged_text if pos.startswith('N'))
|
||||
verb_count = sum(1 for word, pos in tagged_text if pos.startswith('V'))
|
||||
semantic_score = (noun_count + verb_count) / len(tagged_text)
|
||||
|
||||
return semantic_score
|
||||
|
||||
class SpellChecker:
|
||||
def check_spelling(self, text: str) -> List[str]:
|
||||
# Create a spellchecker object
|
||||
spellchecker = enchant.Dict("en_US")
|
||||
|
||||
# Tokenize the text
|
||||
tokens = tokenize.word_tokenize(text)
|
||||
|
||||
# Check the spelling of each token
|
||||
misspelled_words = [token for token in tokens if not spellchecker.check(token)]
|
||||
|
||||
return misspelled_words
|
||||
|
||||
class SEOAnalysisModule:
|
||||
def __init__(self):
|
||||
self.text_preprocessor = TextPreprocessor()
|
||||
self.seo_analyzer = SEOAnalyzer()
|
||||
self.spell_checker = SpellChecker()
|
||||
|
||||
def analyze_text(self, text: str, keywords: List[str]) -> Dict[str, Union[float, List[str]]]:
|
||||
# Preprocess the text
|
||||
preprocessed_text = self.text_preprocessor.preprocess_text(text)
|
||||
|
||||
# Calculate the SEO percentage
|
||||
seo_percentage = self.seo_analyzer.calculate_seo_percentage(preprocessed_text, keywords)
|
||||
|
||||
# Calculate the keyword density
|
||||
keyword_density = self.seo_analyzer.calculate_keyword_density(preprocessed_text, keywords)
|
||||
|
||||
# Calculate the readability score
|
||||
readability_score = self.seo_analyzer.calculate_readability_score(preprocessed_text)
|
||||
|
||||
# Perform semantic analysis
|
||||
semantic_score = self.seo_analyzer.perform_semantic_analysis(preprocessed_text)
|
||||
|
||||
# Check the spelling
|
||||
spelling_errors = self.spell_checker.check_spelling(preprocessed_text)
|
||||
|
||||
return {
|
||||
'seo_percentage': seo_percentage,
|
||||
'keyword_density': keyword_density,
|
||||
'readability_score': readability_score,
|
||||
'semantic_score': semantic_score,
|
||||
'spelling_errors': spelling_errors
|
||||
}
|
||||
Reference in New Issue
Block a user