Features: AI Rich snippet from url, AI product description writer

2024-07-17 12:00:27 +05:30
parent c923435be2
commit 44d83e2b81
19 changed files with 136 additions and 130 deletions
--- a/lib/ai_seo_tools/TBD
+++ b/lib/ai_seo_tools/TBD
@@ -0,0 +1 @@
+https://pypi.org/project/textstat/
--- a/lib/ai_seo_tools/cgpt_seo_analyzer.py
+++ b/lib/ai_seo_tools/cgpt_seo_analyzer.py
@@ -0,0 +1,135 @@
+###################################################
+#
+# The script covers many SEO factors, including keyword presence, title length, 
+# meta description, images, img alt text, headings, internal links, external links, 
+# spelling errors, grammar errors, and readability.
+#
+##################################################
+
+import re
+from bs4 import BeautifulSoup
+from textstat import flesch_reading_ease
+import spellchecker
+
+class SEOAnalyzer:
+    def __init__(self, html_content, target_keywords):
+        self.html_content = html_content
+        self.target_keywords = target_keywords
+
+    def analyze_html_content(self):
+        try:
+            soup = BeautifulSoup(self.html_content, 'html.parser')
+
+            # Extract and clean text from HTML
+            text = ' '.join(soup.stripped_strings)
+            text = re.sub(r'\s+', ' ', text)
+
+            # Calculate keyword density
+            keyword_density = {}
+            for keyword in self.target_keywords:
+                keyword_density[keyword] = (text.lower().count(keyword.lower()) / len(text.split())) * 100
+
+            # Check for the presence of keywords in the title
+            title_tag = soup.find('title')
+            title_text = title_tag.text.lower() if title_tag else ''
+            keyword_presence_in_title = {keyword: keyword.lower() in title_text for keyword in self.target_keywords}
+
+            # Check for the presence of images and keywords in image alt text
+            images = soup.find_all('img')
+            img_alt_text = [img.get('alt', '').lower() for img in images]
+            keyword_presence_in_img_alt_text = {keyword: any(keyword.lower() in alt_text for alt_text in img_alt_text) for keyword in self.target_keywords}
+
+            # Check for the presence of headings
+            headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
+            headings_text = ' '.join(heading.text.lower() for heading in headings)
+
+            # Check for the presence of internal and external links
+            internal_links = len([link for link in soup.find_all('a') if '#' not in link.get('href', '')])
+            external_links = len([link for link in soup.find_all('a') if 'http' in link.get('href', '')])
+
+            # Calculate readability score
+            readability_score = flesch_reading_ease(text)
+
+            # Check for spelling and grammar errors
+            spell = spellchecker.SpellChecker()
+            spelling_errors = len(spell.unknown(text.split()))
+            grammar_errors = len(spell.check_grammar(text))
+
+            # Calculate SEO score
+            seo_score = 0
+
+            # Check for the presence of relevant keywords
+            for keyword in self.target_keywords:
+                if keyword in text.lower():
+                    seo_score += 1
+
+            # Check for title length
+            title_length = len(title_text.split()) if title_text else 0
+            recommended_title_length = (50, 70)
+
+            if recommended_title_length[0] <= title_length <= recommended_title_length[1]:
+                seo_score += 1
+
+            # Generate suggestions for improvement
+            suggestions = []
+            if seo_score < 5:
+                suggestions.append("Add more relevant keywords to your HTML content.")
+                suggestions.append("Make sure your title contains keywords.")
+                suggestions.append("Add keywords to image alt text.")
+                suggestions.append("Add headings to your HTML content.")
+                suggestions.append("Add internal links to your HTML content.")
+
+            return {
+                'Keyword Density': keyword_density,
+                'Keyword Presence in Title': keyword_presence_in_title,
+                'Keyword Presence in Image Alt Text': keyword_presence_in_img_alt_text,
+                'Headings Text': headings_text,
+                'Internal Links': internal_links,
+                'External Links': external_links,
+                'Readability Score': readability_score,
+                'Spelling Errors': spelling_errors,
+                'Grammar Errors': grammar_errors,
+                'SEO Score': seo_score,
+                'Suggestions': suggestions
+            }
+        except Exception as e:
+            return {'error': str(e)}
+
+# Example usage:
+if __name__ == "__main__":
+    html_content = """
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>SEO Analyzer - Sample Page</title>
+        <meta name="description" content="This is a sample page for SEO analysis.">
+    </head>
+    <body>
+        <h1>Welcome to the SEO Analyzer</h1>
+        <p>This is a sample page with some sample content for SEO analysis. It mentions the target keywords SEO, keywords, and content.</p>
+        <img src="image1.jpg" alt="SEO image">
+        <img src="image2.jpg" alt="Keywords image">
+    </body>
+    </html>
+    """
+
+    keywords = ['SEO', 'keywords', 'content']  # Replace with your target keywords
+
+    seo_analyzer = SEOAnalyzer(html_content, keywords)
+    results = seo_analyzer.analyze_html_content()
+
+    print("SEO Analysis Results:")
+    print(f"Keyword Density: {results['Keyword Density']}")
+    print(f"Keyword Presence in Title: {results['Keyword Presence in Title']}")
+    print(f"Keyword Presence in Image Alt Text: {results['Keyword Presence in Image Alt Text']}")
+    print(f"Headings Text: {results['Headings Text']}")
+    print(f"Internal Links: {results['Internal Links']}")
+    print(f"External Links: {results['External Links']}")
+    print(f"Readability Score: {results['Readability Score']}")
+    print(f"Spelling Errors: {results['Spelling Errors']}")
+    print(f"Grammar Errors: {results['Grammar Errors']}")
+    print(f"SEO Score: {results['SEO Score']}")
+    print("Suggestions:")
+    for suggestion in results['Suggestions']:
+        print(suggestion)
+
--- a/lib/ai_seo_tools/plagiarism_checker_from_known_sources.py
+++ b/lib/ai_seo_tools/plagiarism_checker_from_known_sources.py
@@ -0,0 +1,80 @@
+import re
+
+#######################################################################
+#
+# I want you to act as a plagiarism checker. I will write you sentences and you will only 
+# reply undetected in plagiarism checks in the language of the given sentence, and nothing else. 
+# Do not write explanations on replies. My first sentence is {}
+#
+######################################################################
+
+
+class PlagiarismChecker:
+    def __init__(self, known_sources):
+        self.known_sources = known_sources
+
+    def check_plagiarism(self, html_content):
+        try:
+            # Preprocess the HTML content by removing HTML tags and extra spaces
+            text = re.sub(r'<[^>]+>', ' ', html_content)
+            text = re.sub(r'\s+', ' ', text).strip().lower()
+
+            # Check for exact matches with known sources
+            for source in self.known_sources:
+                source_text = re.sub(r'<[^>]+>', ' ', source)
+                source_text = re.sub(r'\s+', ' ', source_text).strip().lower()
+                if text == source_text:
+                    return f"Plagiarism detected: Matches known source - {source}"
+
+            # If no exact matches are found
+            return "No plagiarism detected. Content is original."
+
+        except Exception as e:
+            return str(e)
+
+# Example usage:
+if __name__ == "__main__":
+    # List of known sources
+    known_sources = [
+        """
+        <html>
+        <head>
+            <title>Sample Page 1</title>
+        </head>
+        <body>
+            <h1>Hello, World!</h1>
+            <p>This is sample content from known source 1.</p>
+        </body>
+        </html>
+        """,
+        """
+        <html>
+        <head>
+            <title>Sample Page 2</title>
+        </head>
+        <body>
+            <h1>Welcome to Known Source 2</h1>
+            <p>This is some content from another known source.</p>
+        </body>
+        </html>
+        """
+    ]
+
+    # HTML content to check for plagiarism
+    html_content = """
+    <html>
+    <head>
+        <title>Sample Page</title>
+    </head>
+    <body>
+        <h1>Hello, World!</h1>
+        <p>This is sample content.</p>
+    </body>
+    </html>
+    """
+
+    plagiarism_checker = PlagiarismChecker(known_sources)
+    result = plagiarism_checker.check_plagiarism(html_content)
+
+    print(result)
+
--- a/lib/ai_seo_tools/seo_analysis.py
+++ b/lib/ai_seo_tools/seo_analysis.py
@@ -0,0 +1,115 @@
+from typing import List, Dict, Union
+#from nltk import tokenize, stem, pos_tag
+from textblob import TextBlob
+import enchant
+
+class TextPreprocessor:
+    def preprocess_text(self, text: str) -> str:
+        # Tokenize the text
+        tokens = tokenize.word_tokenize(text)
+        
+        # Stem the tokens
+        stemmer = stem.PorterStemmer()
+        stemmed_tokens = [stemmer.stem(token) for token in tokens]
+        
+        # Join the stemmed tokens back into a string
+        preprocessed_text = ' '.join(stemmed_tokens)
+        
+        return preprocessed_text
+
+class SEOAnalyzer:
+    def calculate_seo_percentage(self, text: str, keywords: List[str]) -> float:
+        # Calculate the keyword density
+        keyword_density = self.calculate_keyword_density(text, keywords)
+        
+        # Calculate the readability score
+        readability_score = self.calculate_readability_score(text)
+        
+        # Perform semantic analysis
+        semantic_score = self.perform_semantic_analysis(text)
+        
+        # Calculate the SEO percentage based on the metrics
+        seo_percentage = (keyword_density + readability_score + semantic_score) / 3
+        
+        return seo_percentage
+    
+    def calculate_keyword_density(self, text: str, keywords: List[str]) -> float:
+        # Count the number of occurrences of each keyword in the text
+        keyword_counts = {keyword: text.lower().count(keyword.lower()) for keyword in keywords}
+        
+        # Calculate the total number of words in the text
+        word_count = len(tokenize.word_tokenize(text))
+        
+        # Calculate the keyword density
+        keyword_density = sum(keyword_counts.values()) / word_count
+        
+        return keyword_density
+    
+    def calculate_readability_score(self, text: str) -> float:
+        # Calculate the average number of words per sentence
+        sentences = tokenize.sent_tokenize(text)
+        word_count = sum(len(tokenize.word_tokenize(sentence)) for sentence in sentences)
+        sentence_count = len(sentences)
+        average_words_per_sentence = word_count / sentence_count
+        
+        # Calculate the readability score
+        readability_score = 1 / average_words_per_sentence
+        
+        return readability_score
+    
+    def perform_semantic_analysis(self, text: str) -> float:
+        # Perform part-of-speech tagging on the text
+        tagged_text = pos_tag(tokenize.word_tokenize(text))
+        
+        # Calculate the semantic score based on the number of nouns and verbs
+        noun_count = sum(1 for word, pos in tagged_text if pos.startswith('N'))
+        verb_count = sum(1 for word, pos in tagged_text if pos.startswith('V'))
+        semantic_score = (noun_count + verb_count) / len(tagged_text)
+        
+        return semantic_score
+
+class SpellChecker:
+    def check_spelling(self, text: str) -> List[str]:
+        # Create a spellchecker object
+        spellchecker = enchant.Dict("en_US")
+        
+        # Tokenize the text
+        tokens = tokenize.word_tokenize(text)
+        
+        # Check the spelling of each token
+        misspelled_words = [token for token in tokens if not spellchecker.check(token)]
+        
+        return misspelled_words
+
+class SEOAnalysisModule:
+    def __init__(self):
+        self.text_preprocessor = TextPreprocessor()
+        self.seo_analyzer = SEOAnalyzer()
+        self.spell_checker = SpellChecker()
+    
+    def analyze_text(self, text: str, keywords: List[str]) -> Dict[str, Union[float, List[str]]]:
+        # Preprocess the text
+        preprocessed_text = self.text_preprocessor.preprocess_text(text)
+        
+        # Calculate the SEO percentage
+        seo_percentage = self.seo_analyzer.calculate_seo_percentage(preprocessed_text, keywords)
+        
+        # Calculate the keyword density
+        keyword_density = self.seo_analyzer.calculate_keyword_density(preprocessed_text, keywords)
+        
+        # Calculate the readability score
+        readability_score = self.seo_analyzer.calculate_readability_score(preprocessed_text)
+        
+        # Perform semantic analysis
+        semantic_score = self.seo_analyzer.perform_semantic_analysis(preprocessed_text)
+        
+        # Check the spelling
+        spelling_errors = self.spell_checker.check_spelling(preprocessed_text)
+        
+        return {
+            'seo_percentage': seo_percentage,
+            'keyword_density': keyword_density,
+            'readability_score': readability_score,
+            'semantic_score': semantic_score,
+            'spelling_errors': spelling_errors
+        }