Initial version of blog Gen

2023-10-07 10:51:22 +05:30
commit 12010c0478
27 changed files with 1295 additions and 0 deletions
--- a/lib/seo_module/seo_analysis.py
+++ b/lib/seo_module/seo_analysis.py
@@ -0,0 +1,115 @@
+from typing import List, Dict, Union
+from nltk import tokenize, stem, pos_tag
+from textblob import TextBlob
+import enchant
+
+class TextPreprocessor:
+    def preprocess_text(self, text: str) -> str:
+        # Tokenize the text
+        tokens = tokenize.word_tokenize(text)
+        
+        # Stem the tokens
+        stemmer = stem.PorterStemmer()
+        stemmed_tokens = [stemmer.stem(token) for token in tokens]
+        
+        # Join the stemmed tokens back into a string
+        preprocessed_text = ' '.join(stemmed_tokens)
+        
+        return preprocessed_text
+
+class SEOAnalyzer:
+    def calculate_seo_percentage(self, text: str, keywords: List[str]) -> float:
+        # Calculate the keyword density
+        keyword_density = self.calculate_keyword_density(text, keywords)
+        
+        # Calculate the readability score
+        readability_score = self.calculate_readability_score(text)
+        
+        # Perform semantic analysis
+        semantic_score = self.perform_semantic_analysis(text)
+        
+        # Calculate the SEO percentage based on the metrics
+        seo_percentage = (keyword_density + readability_score + semantic_score) / 3
+        
+        return seo_percentage
+    
+    def calculate_keyword_density(self, text: str, keywords: List[str]) -> float:
+        # Count the number of occurrences of each keyword in the text
+        keyword_counts = {keyword: text.lower().count(keyword.lower()) for keyword in keywords}
+        
+        # Calculate the total number of words in the text
+        word_count = len(tokenize.word_tokenize(text))
+        
+        # Calculate the keyword density
+        keyword_density = sum(keyword_counts.values()) / word_count
+        
+        return keyword_density
+    
+    def calculate_readability_score(self, text: str) -> float:
+        # Calculate the average number of words per sentence
+        sentences = tokenize.sent_tokenize(text)
+        word_count = sum(len(tokenize.word_tokenize(sentence)) for sentence in sentences)
+        sentence_count = len(sentences)
+        average_words_per_sentence = word_count / sentence_count
+        
+        # Calculate the readability score
+        readability_score = 1 / average_words_per_sentence
+        
+        return readability_score
+    
+    def perform_semantic_analysis(self, text: str) -> float:
+        # Perform part-of-speech tagging on the text
+        tagged_text = pos_tag(tokenize.word_tokenize(text))
+        
+        # Calculate the semantic score based on the number of nouns and verbs
+        noun_count = sum(1 for word, pos in tagged_text if pos.startswith('N'))
+        verb_count = sum(1 for word, pos in tagged_text if pos.startswith('V'))
+        semantic_score = (noun_count + verb_count) / len(tagged_text)
+        
+        return semantic_score
+
+class SpellChecker:
+    def check_spelling(self, text: str) -> List[str]:
+        # Create a spellchecker object
+        spellchecker = enchant.Dict("en_US")
+        
+        # Tokenize the text
+        tokens = tokenize.word_tokenize(text)
+        
+        # Check the spelling of each token
+        misspelled_words = [token for token in tokens if not spellchecker.check(token)]
+        
+        return misspelled_words
+
+class SEOAnalysisModule:
+    def __init__(self):
+        self.text_preprocessor = TextPreprocessor()
+        self.seo_analyzer = SEOAnalyzer()
+        self.spell_checker = SpellChecker()
+    
+    def analyze_text(self, text: str, keywords: List[str]) -> Dict[str, Union[float, List[str]]]:
+        # Preprocess the text
+        preprocessed_text = self.text_preprocessor.preprocess_text(text)
+        
+        # Calculate the SEO percentage
+        seo_percentage = self.seo_analyzer.calculate_seo_percentage(preprocessed_text, keywords)
+        
+        # Calculate the keyword density
+        keyword_density = self.seo_analyzer.calculate_keyword_density(preprocessed_text, keywords)
+        
+        # Calculate the readability score
+        readability_score = self.seo_analyzer.calculate_readability_score(preprocessed_text)
+        
+        # Perform semantic analysis
+        semantic_score = self.seo_analyzer.perform_semantic_analysis(preprocessed_text)
+        
+        # Check the spelling
+        spelling_errors = self.spell_checker.check_spelling(preprocessed_text)
+        
+        return {
+            'seo_percentage': seo_percentage,
+            'keyword_density': keyword_density,
+            'readability_score': readability_score,
+            'semantic_score': semantic_score,
+            'spelling_errors': spelling_errors
+        }