Initial version of blog Gen

2023-10-07 10:51:22 +05:30
commit 12010c0478
27 changed files with 1295 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+__pycache__
+pseo-experiemnts/
--- a/README.md
+++ b/README.md
--- a/2
+++ b/2
@@ -0,0 +1,2 @@
+https://github.com/hardikvasa/google-images-download
+
--- a/blog_images/generated_image.png
+++ b/blog_images/generated_image.png
--- a/blog_images/variation_example.png
+++ b/blog_images/variation_example.png
--- a/lib/.get_text_response.py.swp
+++ b/lib/.get_text_response.py.swp
--- a/lib/.gpte_consent
+++ b/lib/.gpte_consent
@@ -0,0 +1 @@
+true
--- a/lib/blog_proof_reader.py
+++ b/lib/blog_proof_reader.py
@@ -0,0 +1,6 @@
+"""
+I want you act as a proofreader. I will provide you texts and I would like you to review them for any spelling, grammar, or punctuation errors. Once you have finished reviewing the text, provide me with any necessary corrections or suggestions for improve the text.
+
+I want you to act as SEO editor and copywriter. I need you to proofread and analyze the following text and optimize it for the focus keyword. I also need you to correct any grammar mistakes you find in the article. Ask me to provide you with the article text and focus keyword.
+
+"""
--- a/lib/generate_image_from_prompt.py
+++ b/lib/generate_image_from_prompt.py
@@ -0,0 +1,179 @@
+#########################################################
+#
+# This module will generate images for the blogs using APIs
+# from Dall-E and other free resources. Given a prompt, the
+# images will be stored in local directory.
+# Required: openai API key.
+#
+#########################################################
+
+# imports
+
+import openai  # OpenAI Python library to make API calls
+import requests  # used to download images
+import os  # used to access filepaths
+from PIL import Image  # used to logger.info and edit images
+
+# set API key
+# Taking from env is safer than hardcoding here. But, not all have shell to export.
+# Better to take it from a config file and pass it as a parameter.
+# variable OPENAI_API_KEY=<API-KEY>
+openai.api_key = os.environ.get("OPENAI_API_KEY")
+# set a directory to save DALL·E images to
+image_dir_name = "blog_images"
+
+image_dir = os.path.join(os.curdir, image_dir_name)
+# create the directory if it doesn't yet exist
+if not os.path.isdir(image_dir):
+    os.mkdir(image_dir)
+
+
+def generate_image(logger, num_images=1, img_size="1024x1024", response_format="url"):
+    """
+    The generation API endpoint creates an image based on a text prompt.
+
+    Required inputs:
+    prompt (str): A text description of the desired image(s). The maximum length is 1000 characters.
+
+    Optional inputs:
+    --> num_images (int): The number of images to generate. Must be between 1 and 10. Defaults to 1.
+    --> size (str): The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024". 
+    Smaller images are faster. Defaults to "1024x1024".
+    -->response_format (str): The format in which the generated images are returned. 
+    Must be one of "url" or "b64_json". Defaults to "url".
+    --> user (str): A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
+    """
+    # logger.info the directory to save to. TBD: Need to log these.
+    logger.info(f"Generated blog images will be stored at: {image_dir=}")
+
+    # TBD: Ask gpt for prompt for AI generated images as:
+    # I want you to act as an artist advisor providing advice on various art styles such tips on utilizing 
+    # light & shadow effects effectively in painting, shading techniques while sculpting etc.
+    # Develop prompts for an AI-generated art piece inspired by [concept], using [symbolism] and [metaphor].
+    # Provide prompts for an AI-generated art piece inspired by [era] art, incorporating [medium] and [subject matter].
+    # Develop a set of prompts that could be used to generate AI-generated art focused on the theme of “urban decay.”
+    # I want you to act as a prompt generator for Science Fiction Art and 
+    # give me five prompts that transport me to a futuristic world.
+    # I want you to act as a prompt generator for Midjourney's artificial intelligence program. 
+    # Your job is to provide detailed and creative descriptions that will inspire unique and interesting images from the AI. 
+    # Keep in mind that the AI is capable of understanding a wide range of language and can interpret abstract concepts, 
+    # so feel free to be as imaginative and descriptive as possible. For example, 
+    # you could describe a scene from a futuristic city, or a surreal landscape filled with strange creatures. 
+    # The more detailed and imaginative your description, the more interesting the resulting image will be. 
+    # Here is your first prompt: ""
+    
+    prompt = "An illustration of AI teaching human to speak"
+    
+    # call the OpenAI API to generate image from prompt.
+    logger.info(f"Calling openai.image.generate with prompt: {prompt}")
+    try:
+        img_generation_response = openai.Image.create(
+            prompt=prompt,
+            n=1,
+            size="1024x1024",
+            response_format="url",
+        )
+    except AttributeError as aerr:
+        logger.info(f"Failed to generate Image, Try: pip install openai --upgrade in your terminal.Error: {aerr}")
+    else:
+        # logger.info response/result. dbg.
+        print(f"{img_generation_response}")
+        save_generated_image(logger, img_generation_response)
+
+
+def save_generated_image(logger, img_generation_response):
+    """
+         
+    """
+    # save the image
+    # We need to change the image name to unique, overwrite and for SEO considerations.
+    # Note: filetype should be *.png
+    generated_image_name = "generated_image.png"
+
+    generated_image_filepath = os.path.join(image_dir, generated_image_name)
+    # extract image URL from response
+    generated_image_url = img_generation_response["data"][0]["url"]
+    print(f"Extracted URL: {generated_image_url}")
+    
+    # We use the requests library to fetch the image from URL
+    response = requests.get(generated_image_url, stream=True)
+    # We use the Image Class from PIL library to open the image
+    Image.open(response.raw)
+    # Download the image.
+    try:
+        generated_image = requests.get(generated_image_url).content
+    except requests.exceptions.RequestException as e:
+        raise SystemExit(f"Failed to get generted image content: {e}")
+    else:
+        with open(generated_image_filepath, "wb") as image_file:
+            # Write the image to a file and store.
+            image_file.write(generated_image)
+
+    # Optional, dbg.
+    # logger.info the image
+    #logger.info(generated_image_filepath)
+    print("Display the generated image.")
+    img = Image.open(generated_image_filepath)
+    img.show()
+
+    # Close image window.
+    #for proc in psutil.process_iter():
+    #    if proc.name() == "Image Viewer":
+    #        proc.kill()
+
+
+# WIP
+# The idea is to download images from other blogs and recreate from it.
+# This helps us generate images very close to the topic and also not worry about prompt message.
+def gen_new_from_given_img(logger, num_img=1, img_size="1024x1024", response_format="url"):
+    """
+    This function will take an image and produce a variant of it.
+    Required inputs:
+    image (str): The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB, and square.
+
+    Optional inputs:
+    n (int): The number of images to generate. Must be between 1 and 10. Defaults to 1.
+    size (str): The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024". 
+    Smaller images are faster. Defaults to "1024x1024".
+    response_format (str): The format in which the generated images are returned. Must be one of "url" or "b64_json". Defaults to "url".
+    user (str): A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
+    """
+    img_path = "/home/ajsingh/pseo_experiments_V0.0.1/blog_images/variation_example.png"
+    try:
+        png = Image.open(img_path).convert('RGBA')
+        background = Image.new('RGBA', png.size, (255, 255, 255))
+
+        alpha_composite = Image.alpha_composite(background, png)
+        alpha_composite.save('foo.png', 'PNG', quality=80)
+        variation_response = openai.Image.create_variation(
+            image=open('foo.jpg', "rb"),
+            n=num_img,
+            size=img_size,
+            response_format=response_format,
+        )
+    except Exception as err:
+        logger.error(f"An error occured in Image.create_variation::: {err}")
+        SystemExit(1)
+
+    # logger.info response
+    logger.info(variation_response)
+
+    # save the images
+    variation_urls = [datum["url"] for datum in variation_response["data"]]  # extract URLs
+    variation_images = [requests.get(url).content for url in variation_urls]  # download images
+    variation_image_names = [f"variation_image_{i}.png" for i in range(len(variation_images))]  # create names
+    variation_image_filepaths = [os.path.join(image_dir, name) for name in variation_image_names]  # create filepaths
+    for image, filepath in zip(variation_images, variation_image_filepaths):  # loop through the variations
+        with open(filepath, "wb") as image_file:  # open the file
+            image_file.write(image)  # write the image to the file
+
+    # logger.info the original image
+    logger.info(generated_image_filepath)
+    orig_img = Image.open(generated_image_filepath)
+    orig_img.show()
+
+    # logger.info the new variations
+    for variation_image_filepaths in variation_image_filepaths:
+        logger.info(variation_image_filepaths)
+        var_img = Image.open(variation_image_filepaths)
+        var_img.show()
--- a/lib/get_text_response.py
+++ b/lib/get_text_response.py
@@ -0,0 +1,200 @@
+########################################################################
+#
+# Common module for getting response from gpt for given prompt.
+# This module includes following capabilities:
+# 
+#
+#
+########################################################################
+
+import openai
+from tqdm import tqdm, trange
+import time
+import re
+
+
+def get_prompt_reply(prompt, max_token, outputs=1):
+    try:
+        # using OpenAI's Completion module that helps execute
+        # any tasks involving text
+        response = openai.Completion.create(
+            # model name used here is text-davinci-003
+            # there are many other models available under the
+            # umbrella of GPT-3
+            model="text-davinci-003",
+            # passing the user input
+            prompt=prompt,
+            # generated output can have "max_tokens" number of tokens
+            max_tokens=max_token,
+            # number of outputs generated in one call
+            n=outputs
+    )
+    except openai.error.Timeout as e:
+       #Handle timeout error, e.g. retry or log
+       print(f"OpenAI API request timed out: {e}")
+       pass
+    except openai.error.APIError as e:
+       #Handle API error, e.g. retry or log
+       print(f"OpenAI API returned an API Error: {e}")
+       pass
+    except openai.error.APIConnectionError as e:
+       #Handle connection error, e.g. check network or log
+       print(f"OpenAI API request failed to connect: {e}")
+       pass
+    except openai.error.InvalidRequestError as e:
+       #Handle invalid request error, e.g. validate parameters or log
+       print(f"OpenAI API request was invalid: {e}")
+       pass
+    except openai.error.AuthenticationError as e:
+       #Handle authentication error, e.g. check credentials or log
+       print(f"OpenAI API request was not authorized: {e}")
+       pass
+    except openai.error.PermissionError as e:
+       #Handle permission error, e.g. check scope or log
+       print(f"OpenAI API request was not permitted: {e}")
+       pass
+    except openai.error.RateLimitError as e:
+       #Handle rate limit error, e.g. wait or log
+       print(f"OpenAI API request exceeded rate limit: {e}")
+       pass
+
+    print(f"Prompt output: {response.choices[0].text.strip()}")
+    # creating a list to store all the outputs
+    output = list()
+    for k in response['choices']:
+        output.append(k['text'].strip())
+    return output
+
+
+def generate_detailed_blog(blog_keywords):
+    """
+    This function will take a blog Topic to first generate sections for it
+    and then generate content for each section.
+    """
+
+    # TBD
+    # I want you to act as a blogger and you want to write a blog post about [topic], 
+    # with a friendly and approachable tone that engages readers. 
+    # Your target audience is [define your target audience]. 
+    # Write in a personal style using singular first-person pronouns only. 
+    # I want you to include these keywords: [keyword 1], [keyword 2], [keyword 3] throughout the article.
+    # Format your response using markdown. 
+    # Use headings, subheadings, bullet points, and bold to organize the information.
+    # Answer the most commonly asked questions about the topic at the end of the article.
+    # Create a list of the most popular tools used by the [Field of Interest] professionals with the pros and cons of each tool.
+
+    # Use to store the blog in a string, to save in a *.md file.
+    blog_markdown_str = ""
+    blog_topic_arr = list(generate_blog_topics(blog_keywords).split("\n"))
+    # Remove null values and incomplete results.
+    while('' in blog_topic_arr):
+        blog_topic_arr.remove('')
+
+    print(f"Generated Blog Topics: {type(blog_topic_arr)}---- {blog_topic_arr}")
+    
+    # For each of blog topic, generate content.
+    for a_blog_topic in blog_topic_arr:
+        # Error in generating topic content: Rate limit reached for default-global-with-image-limits 
+        # in free account on requests per min. Limit: 3 / min. Please try again in 20s.
+        for i in trange(30):
+            time.sleep(1)
+        # The generated topics usually have 1) or ^\W*\D* . Remove them from prompt.
+        a_topic = re.sub(r"^\W*\D*", "", a_blog_topic)
+        
+        tpc_cnt = generate_topic_content(a_topic)
+        #print(f"{a_topic} ------ {tpc_cnt}")
+
+        # We now need to concatenate all the sections and sew it into blog content.
+        tmp_blog_markdown_str = blog_markdown_str + " " + a_blog_topic + " " + f"{tpc_cnt}"
+        blog_markdown_str = blog_markdown_str + a_blog_topic + "\n\n" + f"{tpc_cnt}" + "\n\n"
+
+    # print/check the final blog content.
+    print(f"Final blog content: {blog_markdown_str}")
+    # Save the blog content as a .md file. Markdown or HTML ?
+    # Best to name the file
+
+
+
+def generate_blog_topics(blog_keywords):
+    """
+    For a given prompt, generate blog topics.
+    Using the davinci-instruct-beta-v3 model. It’s proven to be an ideal 
+    one for generating unique blog content.
+    Ex: Generate SEO optimized blog topics on AI text to image with Python
+    """
+    # Prompt engineering, huh ?
+    # Create a blog post about “{blogPostTopic}” . Write it in a “{tone}” tone. Use transition words. 
+    # Use active voice. Write over 1000 words. The blog post should be in a beginners guide style. 
+    # Add title and subtitle for each section. It should have a minimum of 6 sections. 
+    # Include the following keywords: “{keywords}”. Create a good slug for this post and a 
+    # meta description with a maximum of 100 words. and add it to the end of the blog post
+
+    prompt = f"As an experienced AI scientist and technical writer, generate SEO optimized blog topics about {blog_keywords}."
+    #prompt = "Generate SEO optimized blog topics for" + " " + f"{blog_keywords}"
+    try:
+        response = openai.Completion.create(
+            engine="davinci-instruct-beta-v3",
+            prompt=prompt,
+            temperature=0.7,
+            max_tokens=100,
+            top_p=1,
+            frequency_penalty=0,
+            presence_penalty=0
+        )
+        return response.choices[0].text
+    except Exception as err:
+        print(f"Error in generating blog topics: {err}")
+
+
+def generate_topic_content(prompt):
+    """
+    For each of given topic generate content for it.
+    """
+    try:
+        # Generate a blog post outline for the following topic: {topic}. 
+        # The outline should contain various subheadings and include the starting sentence for each section.
+        prompt = f"As an experienced AI researcher and technical writer, blog about {prompt}."
+        response = openai.Completion.create(
+            engine="davinci-instruct-beta-v3",
+            prompt=prompt,
+            temperature=0.7,
+            max_tokens=500,
+            top_p=1,
+            frequency_penalty=0,
+            presence_penalty=0
+        )
+    except Exception as err:
+        print(f"Error in generating topic content: {err}")
+
+    return response.choices[0].text
+
+
+def generate_blog_description():
+    """
+        Prompt designed to give SEO optimized blog descripton
+    """
+    # Suggest keywords that I should include in my meta description for my blog post on [topic]
+
+    # I want to generate high CTR meta and keyword rich meta title and meta descriptions in text format. 
+    # My keywords are – [keyword 1], [keyword 2], [keyword 3]
+
+    pass
+
+
+def get_blog_tags(blog_article):
+    """
+        Function to suggest tags for the given blog content
+    """
+    # Suggest at least 5 tags for the following blog post [Enter your blog post text here].
+    pass
+
+
+def get_long_tailed_keywords(blog_article):
+    """
+        Function to get long tailed keywords for the blog article.
+    """
+    #  want you to generate a list of long-tail keywords that are related to the following blog post [Enter blog post text here]
+    pass
+
+
+
--- a/lib/is_content_ai_generated.py
+++ b/lib/is_content_ai_generated.py
@@ -0,0 +1,65 @@
+##############################################################################################
+#
+# Checks for:
+# Short, fragmented sentences that lack human-like coherence.
+# Frequent use of overly complex words or technical jargon.
+#
+# These checks are based on common observations that AI-generated content may sometimes produce 
+# text with unusual patterns or characteristics. However, please keep in mind that these 
+# heuristics are not guaranteed to detect all AI-generated content, and false positives or 
+# negatives can still occur. More advanced techniques and models would be required for more accurate detection.
+#
+#############################################################################################
+
+import spacy
+
+# Load the English language model from spaCy
+nlp = spacy.load("en_core_web_sm")
+
+def is_ai_generated(text):
+    # Tokenize the text using spaCy
+    doc = nlp(text)
+
+    # Check for indicators of AI-generated content
+    ai_indicators = [
+        "generated by AI",
+        "auto-generated",
+        "machine-generated",
+        "artificial intelligence",
+        "neural network",
+        "GPT-3",
+        "AI model",
+    ]
+
+    for indicator in ai_indicators:
+        if indicator.lower() in text.lower():
+            return True
+
+    # Check for repetitive patterns or lack of human-like variations
+    for i in range(len(doc) - 2):
+        if doc[i].text == doc[i + 1].text == doc[i + 2].text:
+            return True
+
+    # Check for short, fragmented sentences that lack human-like coherence
+    for sentence in doc.sents:
+        if len(sentence) < 5:
+            return True
+
+    # Check for frequent use of overly complex words or technical jargon
+    complex_word_count = sum(1 for token in doc if token.is_alpha and len(token.text) > 10)
+    if complex_word_count > len(doc) // 10:  # Adjust the threshold as needed
+        return True
+
+    return False
+
+if __name__ == "__main__":
+    input_text = """
+    This is an article generated by a state-of-the-art AI model.
+    The content is machine-generated and may not represent human writing style.
+    """
+
+    if is_ai_generated(input_text):
+        print("The content appears to be AI-generated.")
+    else:
+        print("The content appears to be written by a human.")
+
--- a/lib/plagiarism_checker/main.py
+++ b/lib/plagiarism_checker/main.py
@@ -0,0 +1,20 @@
+## main.py
+from plagiarism_checker import PlagiarismChecker
+
+
+def main():
+    # Create an instance of the PlagiarismChecker class
+    checker = PlagiarismChecker()
+
+    # Get the input string from the user
+    input_string = input("Enter the input string: ")
+
+    # Check plagiarism in the input string
+    percentage = checker.check_plagiarism(input_string)
+
+    # Print the percentage of original content
+    print(f"The percentage of original content is: {percentage}%")
+
+
+if __name__ == "__main__":
+    main()
--- a/lib/plagiarism_checker/plagiarism_checker_from_known_sources.py
+++ b/lib/plagiarism_checker/plagiarism_checker_from_known_sources.py
@@ -0,0 +1,71 @@
+import re
+
+class PlagiarismChecker:
+    def __init__(self, known_sources):
+        self.known_sources = known_sources
+
+    def check_plagiarism(self, html_content):
+        try:
+            # Preprocess the HTML content by removing HTML tags and extra spaces
+            text = re.sub(r'<[^>]+>', ' ', html_content)
+            text = re.sub(r'\s+', ' ', text).strip().lower()
+
+            # Check for exact matches with known sources
+            for source in self.known_sources:
+                source_text = re.sub(r'<[^>]+>', ' ', source)
+                source_text = re.sub(r'\s+', ' ', source_text).strip().lower()
+                if text == source_text:
+                    return f"Plagiarism detected: Matches known source - {source}"
+
+            # If no exact matches are found
+            return "No plagiarism detected. Content is original."
+
+        except Exception as e:
+            return str(e)
+
+# Example usage:
+if __name__ == "__main__":
+    # List of known sources
+    known_sources = [
+        """
+        <html>
+        <head>
+            <title>Sample Page 1</title>
+        </head>
+        <body>
+            <h1>Hello, World!</h1>
+            <p>This is sample content from known source 1.</p>
+        </body>
+        </html>
+        """,
+        """
+        <html>
+        <head>
+            <title>Sample Page 2</title>
+        </head>
+        <body>
+            <h1>Welcome to Known Source 2</h1>
+            <p>This is some content from another known source.</p>
+        </body>
+        </html>
+        """
+    ]
+
+    # HTML content to check for plagiarism
+    html_content = """
+    <html>
+    <head>
+        <title>Sample Page</title>
+    </head>
+    <body>
+        <h1>Hello, World!</h1>
+        <p>This is sample content.</p>
+    </body>
+    </html>
+    """
+
+    plagiarism_checker = PlagiarismChecker(known_sources)
+    result = plagiarism_checker.check_plagiarism(html_content)
+
+    print(result)
+
--- a/lib/seo_module/README.md
+++ b/lib/seo_module/README.md
@@ -0,0 +1,33 @@
+## Implementation approach
+
+To implement the SEO module, we will use the following open-source tools and frameworks:
+
+1. Natural Language Toolkit (NLTK): NLTK is a popular library for natural language processing in Python. We can leverage NLTK to perform various SEO checks on the given text, such as keyword density, readability analysis, and sentiment analysis.
+
+2. Beautiful Soup: Beautiful Soup is a Python library for web scraping. We can use Beautiful Soup to extract relevant information from the given text, such as meta tags, headings, and image alt attributes.
+
+3. PyEnchant: PyEnchant is a spell checking library for Python. We can utilize PyEnchant to check the spelling and grammar of the given text and provide suggestions for improvement.
+
+4. TextBlob: TextBlob is a library for processing textual data. We can use TextBlob to perform part-of-speech tagging, noun phrase extraction, and other linguistic analyses on the given text.
+
+5. Flask: Use Flask for local testing and development purposes. Flask provides a lightweight web framework that allows us to quickly build and test our SEO module.
+
+Overall, by leveraging these open-source tools and frameworks, we can develop a comprehensive and efficient SEO module that meets the requirements and provides valuable insights and suggestions for improving the SEO of the given text.
+
+## Required Python third-party packages
+
+- nltk==3.6.2
+- beautifulsoup4==4.9.3
+- pyenchant==3.2.1
+- textblob==0.15.3
+- flask==1.1.2
+
+## Modules
+
+The 'text_processor.py' file contains the TextProcessor class, which is responsible for extracting meta tags, headings, and image alt attributes from the given text.
+        
+The 'spell_checker.py' file contains the SpellChecker class, which is responsible for checking the spelling and grammar of the given text.
+        
+The 'seo_checker.py' file contains the SEOChecker class, which is responsible for coordinating the SEO checks by utilizing the TextProcessor and SpellChecker classes.
+
+
--- a/lib/seo_module/cgpt_seo_analyzer.py
+++ b/lib/seo_module/cgpt_seo_analyzer.py
@@ -0,0 +1,135 @@
+###################################################
+#
+# The script covers many SEO factors, including keyword presence, title length, 
+# meta description, images, img alt text, headings, internal links, external links, 
+# spelling errors, grammar errors, and readability.
+#
+##################################################
+
+import re
+from bs4 import BeautifulSoup
+from textstat import flesch_reading_ease
+import spellchecker
+
+class SEOAnalyzer:
+    def __init__(self, html_content, target_keywords):
+        self.html_content = html_content
+        self.target_keywords = target_keywords
+
+    def analyze_html_content(self):
+        try:
+            soup = BeautifulSoup(self.html_content, 'html.parser')
+
+            # Extract and clean text from HTML
+            text = ' '.join(soup.stripped_strings)
+            text = re.sub(r'\s+', ' ', text)
+
+            # Calculate keyword density
+            keyword_density = {}
+            for keyword in self.target_keywords:
+                keyword_density[keyword] = (text.lower().count(keyword.lower()) / len(text.split())) * 100
+
+            # Check for the presence of keywords in the title
+            title_tag = soup.find('title')
+            title_text = title_tag.text.lower() if title_tag else ''
+            keyword_presence_in_title = {keyword: keyword.lower() in title_text for keyword in self.target_keywords}
+
+            # Check for the presence of images and keywords in image alt text
+            images = soup.find_all('img')
+            img_alt_text = [img.get('alt', '').lower() for img in images]
+            keyword_presence_in_img_alt_text = {keyword: any(keyword.lower() in alt_text for alt_text in img_alt_text) for keyword in self.target_keywords}
+
+            # Check for the presence of headings
+            headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
+            headings_text = ' '.join(heading.text.lower() for heading in headings)
+
+            # Check for the presence of internal and external links
+            internal_links = len([link for link in soup.find_all('a') if '#' not in link.get('href', '')])
+            external_links = len([link for link in soup.find_all('a') if 'http' in link.get('href', '')])
+
+            # Calculate readability score
+            readability_score = flesch_reading_ease(text)
+
+            # Check for spelling and grammar errors
+            spell = spellchecker.SpellChecker()
+            spelling_errors = len(spell.unknown(text.split()))
+            grammar_errors = len(spell.check_grammar(text))
+
+            # Calculate SEO score
+            seo_score = 0
+
+            # Check for the presence of relevant keywords
+            for keyword in self.target_keywords:
+                if keyword in text.lower():
+                    seo_score += 1
+
+            # Check for title length
+            title_length = len(title_text.split()) if title_text else 0
+            recommended_title_length = (50, 70)
+
+            if recommended_title_length[0] <= title_length <= recommended_title_length[1]:
+                seo_score += 1
+
+            # Generate suggestions for improvement
+            suggestions = []
+            if seo_score < 5:
+                suggestions.append("Add more relevant keywords to your HTML content.")
+                suggestions.append("Make sure your title contains keywords.")
+                suggestions.append("Add keywords to image alt text.")
+                suggestions.append("Add headings to your HTML content.")
+                suggestions.append("Add internal links to your HTML content.")
+
+            return {
+                'Keyword Density': keyword_density,
+                'Keyword Presence in Title': keyword_presence_in_title,
+                'Keyword Presence in Image Alt Text': keyword_presence_in_img_alt_text,
+                'Headings Text': headings_text,
+                'Internal Links': internal_links,
+                'External Links': external_links,
+                'Readability Score': readability_score,
+                'Spelling Errors': spelling_errors,
+                'Grammar Errors': grammar_errors,
+                'SEO Score': seo_score,
+                'Suggestions': suggestions
+            }
+        except Exception as e:
+            return {'error': str(e)}
+
+# Example usage:
+if __name__ == "__main__":
+    html_content = """
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>SEO Analyzer - Sample Page</title>
+        <meta name="description" content="This is a sample page for SEO analysis.">
+    </head>
+    <body>
+        <h1>Welcome to the SEO Analyzer</h1>
+        <p>This is a sample page with some sample content for SEO analysis. It mentions the target keywords SEO, keywords, and content.</p>
+        <img src="image1.jpg" alt="SEO image">
+        <img src="image2.jpg" alt="Keywords image">
+    </body>
+    </html>
+    """
+
+    keywords = ['SEO', 'keywords', 'content']  # Replace with your target keywords
+
+    seo_analyzer = SEOAnalyzer(html_content, keywords)
+    results = seo_analyzer.analyze_html_content()
+
+    print("SEO Analysis Results:")
+    print(f"Keyword Density: {results['Keyword Density']}")
+    print(f"Keyword Presence in Title: {results['Keyword Presence in Title']}")
+    print(f"Keyword Presence in Image Alt Text: {results['Keyword Presence in Image Alt Text']}")
+    print(f"Headings Text: {results['Headings Text']}")
+    print(f"Internal Links: {results['Internal Links']}")
+    print(f"External Links: {results['External Links']}")
+    print(f"Readability Score: {results['Readability Score']}")
+    print(f"Spelling Errors: {results['Spelling Errors']}")
+    print(f"Grammar Errors: {results['Grammar Errors']}")
+    print(f"SEO Score: {results['SEO Score']}")
+    print("Suggestions:")
+    for suggestion in results['Suggestions']:
+        print(suggestion)
+
--- a/lib/seo_module/seo_analysis.py
+++ b/lib/seo_module/seo_analysis.py
@@ -0,0 +1,115 @@
+from typing import List, Dict, Union
+from nltk import tokenize, stem, pos_tag
+from textblob import TextBlob
+import enchant
+
+class TextPreprocessor:
+    def preprocess_text(self, text: str) -> str:
+        # Tokenize the text
+        tokens = tokenize.word_tokenize(text)
+        
+        # Stem the tokens
+        stemmer = stem.PorterStemmer()
+        stemmed_tokens = [stemmer.stem(token) for token in tokens]
+        
+        # Join the stemmed tokens back into a string
+        preprocessed_text = ' '.join(stemmed_tokens)
+        
+        return preprocessed_text
+
+class SEOAnalyzer:
+    def calculate_seo_percentage(self, text: str, keywords: List[str]) -> float:
+        # Calculate the keyword density
+        keyword_density = self.calculate_keyword_density(text, keywords)
+        
+        # Calculate the readability score
+        readability_score = self.calculate_readability_score(text)
+        
+        # Perform semantic analysis
+        semantic_score = self.perform_semantic_analysis(text)
+        
+        # Calculate the SEO percentage based on the metrics
+        seo_percentage = (keyword_density + readability_score + semantic_score) / 3
+        
+        return seo_percentage
+    
+    def calculate_keyword_density(self, text: str, keywords: List[str]) -> float:
+        # Count the number of occurrences of each keyword in the text
+        keyword_counts = {keyword: text.lower().count(keyword.lower()) for keyword in keywords}
+        
+        # Calculate the total number of words in the text
+        word_count = len(tokenize.word_tokenize(text))
+        
+        # Calculate the keyword density
+        keyword_density = sum(keyword_counts.values()) / word_count
+        
+        return keyword_density
+    
+    def calculate_readability_score(self, text: str) -> float:
+        # Calculate the average number of words per sentence
+        sentences = tokenize.sent_tokenize(text)
+        word_count = sum(len(tokenize.word_tokenize(sentence)) for sentence in sentences)
+        sentence_count = len(sentences)
+        average_words_per_sentence = word_count / sentence_count
+        
+        # Calculate the readability score
+        readability_score = 1 / average_words_per_sentence
+        
+        return readability_score
+    
+    def perform_semantic_analysis(self, text: str) -> float:
+        # Perform part-of-speech tagging on the text
+        tagged_text = pos_tag(tokenize.word_tokenize(text))
+        
+        # Calculate the semantic score based on the number of nouns and verbs
+        noun_count = sum(1 for word, pos in tagged_text if pos.startswith('N'))
+        verb_count = sum(1 for word, pos in tagged_text if pos.startswith('V'))
+        semantic_score = (noun_count + verb_count) / len(tagged_text)
+        
+        return semantic_score
+
+class SpellChecker:
+    def check_spelling(self, text: str) -> List[str]:
+        # Create a spellchecker object
+        spellchecker = enchant.Dict("en_US")
+        
+        # Tokenize the text
+        tokens = tokenize.word_tokenize(text)
+        
+        # Check the spelling of each token
+        misspelled_words = [token for token in tokens if not spellchecker.check(token)]
+        
+        return misspelled_words
+
+class SEOAnalysisModule:
+    def __init__(self):
+        self.text_preprocessor = TextPreprocessor()
+        self.seo_analyzer = SEOAnalyzer()
+        self.spell_checker = SpellChecker()
+    
+    def analyze_text(self, text: str, keywords: List[str]) -> Dict[str, Union[float, List[str]]]:
+        # Preprocess the text
+        preprocessed_text = self.text_preprocessor.preprocess_text(text)
+        
+        # Calculate the SEO percentage
+        seo_percentage = self.seo_analyzer.calculate_seo_percentage(preprocessed_text, keywords)
+        
+        # Calculate the keyword density
+        keyword_density = self.seo_analyzer.calculate_keyword_density(preprocessed_text, keywords)
+        
+        # Calculate the readability score
+        readability_score = self.seo_analyzer.calculate_readability_score(preprocessed_text)
+        
+        # Perform semantic analysis
+        semantic_score = self.seo_analyzer.perform_semantic_analysis(preprocessed_text)
+        
+        # Check the spelling
+        spelling_errors = self.spell_checker.check_spelling(preprocessed_text)
+        
+        return {
+            'seo_percentage': seo_percentage,
+            'keyword_density': keyword_density,
+            'readability_score': readability_score,
+            'semantic_score': semantic_score,
+            'spelling_errors': spelling_errors
+        }
--- a/lib/wordpress_api_integration/README.md
+++ b/lib/wordpress_api_integration/README.md
@@ -0,0 +1,71 @@
+## Required Python third-party packages
+
+- requests==2.26.0
+- pytest==6.2.5
+- json==2.0.9
+
+## Logic Analysis
+
+- ['main.py', 'Main']
+- ['wordpress_api.py', 'WordpressAPI']
+- ['test_wordpress_api.py', 'TestWordpressAPI']
+
+## Task list
+
+'main.py' contains the main entry point of the program.
+'wordpress_api.py' contains the implementation of the WordpressAPI class, which handles the integration with the Wordpress API.
+'test_wordpress_api.py' contains unit tests for the WordpressAPI class.
+
+## Implementation approach
+
+To implement the wordpress API integration module, we will use the requests library, which is a popular open-source library for making HTTP requests in Python. This library provides a simple and intuitive way to send HTTP requests and handle responses. We will also use the json library to handle JSON data. Additionally, we will write unit tests using the pytest framework to ensure the functionality and quality of the module. The module will be designed to be easily integrated into existing Python codebases by providing clear usage instructions and documentation.
+
+## Python package name
+
+wordpress_api_integration
+
+## File list
+
+- main.py
+- wordpress_api.py
+- test_wordpress_api.py
+
+## Data structures and interface definitions
+
+
+    classDiagram
+        class WordpressAPI{
+            +str base_url
+            +str username
+            +str password
+            +str token
+            +str authenticate() 
+            +str upload_content(str content)
+        }
+        WordpressAPI "1" -- "1" Authentication: has
+        WordpressAPI "1" -- "1" ContentUpload: has
+        
+        class Authentication{
+            +str authenticate()
+        }
+        
+        class ContentUpload{
+            +str upload_content(str content)
+        }
+    
+
+## Program call flow
+
+
+    sequenceDiagram
+        participant M as Main
+        participant WP as WordpressAPI
+        participant A as Authentication
+        participant CU as ContentUpload
+        
+        M->>WP: Create WordpressAPI instance
+        WP->>A: Create Authentication instance
+        A->>WP: Authenticate
+        WP->>CU: Create ContentUpload instance
+        CU->>WP: Upload content
+
--- a/lib/wordpress_api_integration/V1/main.py
+++ b/lib/wordpress_api_integration/V1/main.py
@@ -0,0 +1,21 @@
+## main.py
+
+from wordpress_api import WordpressAPI
+
+
+def main():
+    """
+    Main entry point of the program.
+    """
+    # Create WordpressAPI instance
+    wp_api = WordpressAPI(base_url="https://example.com", username="admin", password="password")
+    
+    # Authenticate
+    wp_api.authenticate()
+    
+    # Upload content
+    content = "This is a test content"
+    wp_api.upload_content(content)
+    
+if __name__ == "__main__":
+    main()
--- a/lib/wordpress_api_integration/V1/test_wordpress_api.py
+++ b/lib/wordpress_api_integration/V1/test_wordpress_api.py
@@ -0,0 +1,30 @@
+## test_wordpress_api.py
+
+import pytest
+from wordpress_api import WordpressAPI
+
+
+class TestWordpressAPI:
+    @pytest.fixture
+    def wp_api(self):
+        return WordpressAPI(base_url="https://example.com", username="admin", password="password")
+    
+    def test_authenticate_success(self, wp_api):
+        wp_api.authenticate()
+        assert wp_api.authentication.token is not None
+    
+    def test_authenticate_failure(self, wp_api):
+        wp_api.authentication.password = "wrong_password"
+        with pytest.raises(Exception):
+            wp_api.authenticate()
+    
+    def test_upload_content_success(self, wp_api):
+        content = "This is a test content"
+        wp_api.upload_content(content)
+        # Add assertions here to verify the success of content upload
+    
+    def test_upload_content_failure(self, wp_api):
+        content = "This is a test content"
+        wp_api.content_upload.base_url = "https://wrong_url.com"
+        with pytest.raises(Exception):
+            wp_api.upload_content(content)
--- a/lib/wordpress_api_integration/V1/wordpress_api.py
+++ b/lib/wordpress_api_integration/V1/wordpress_api.py
@@ -0,0 +1,75 @@
+## wordpress_api.py
+
+import requests
+import json
+
+class Authentication:
+    def __init__(self, base_url, username, password):
+        self.base_url = base_url
+        self.username = username
+        self.password = password
+        self.token = None
+    
+    def authenticate(self):
+        """
+        Authenticates the user with the Wordpress API.
+        """
+        url = f"{self.base_url}/authenticate"
+        payload = {
+            "username": self.username,
+            "password": self.password
+        }
+        headers = {
+            "Content-Type": "application/json"
+        }
+        
+        response = requests.post(url, json=payload, headers=headers)
+        
+        if response.status_code == 200:
+            self.token = response.json()["token"]
+        else:
+            raise Exception("Authentication failed")
+            
+
+class ContentUpload:
+    def __init__(self, base_url, token):
+        self.base_url = base_url
+        self.token = token
+    
+    def upload_content(self, content):
+        """
+        Uploads the given content to the Wordpress API.
+        """
+        url = f"{self.base_url}/upload"
+        payload = {
+            "content": content,
+            "token": self.token
+        }
+        headers = {
+            "Content-Type": "application/json"
+        }
+        
+        response = requests.post(url, json=payload, headers=headers)
+        
+        if response.status_code != 200:
+            raise Exception("Content upload failed")
+
+class WordpressAPI:
+    def __init__(self, base_url, username, password):
+        self.base_url = base_url
+        self.username = username
+        self.password = password
+        self.authentication = Authentication(base_url, username, password)
+        self.content_upload = ContentUpload(base_url, self.authentication.token)
+    
+    def authenticate(self):
+        """
+        Authenticates the user with the Wordpress API.
+        """
+        self.authentication.authenticate()
+    
+    def upload_content(self, content):
+        """
+        Uploads the given content to the Wordpress API.
+        """
+        self.content_upload.upload_content(content)
--- a/lib/wordpress_api_integration/V2/main.py
+++ b/lib/wordpress_api_integration/V2/main.py
@@ -0,0 +1,54 @@
+## main.py
+
+import os
+import requests
+import json
+
+
+class WordPressAPIIntegration:
+    def __init__(self, credentials: dict):
+        self.credentials = credentials
+
+    def upload_file(self, file_path: str) -> bool:
+        if not self._check_file(file_path):
+            return False
+
+        if not self._authenticate():
+            return False
+
+        if not self._upload_file_to_api(file_path):
+            return False
+
+        return True
+
+    def _check_file(self, file_path: str) -> bool:
+        max_file_size = 10 * 1024 * 1024  # 10MB
+        file_size = os.path.getsize(file_path)
+        if file_size > max_file_size:
+            return False
+
+        valid_file_types = ['.jpg', '.jpeg', '.png', '.gif']
+        file_extension = os.path.splitext(file_path)[1]
+        if file_extension not in valid_file_types:
+            return False
+
+        return True
+
+    def _authenticate(self) -> bool:
+        url = "https://wordpress-api.com/authenticate"
+        headers = {'Content-Type': 'application/json'}
+        data = json.dumps(self.credentials)
+        response = requests.post(url, headers=headers, data=data)
+        if response.status_code == 200:
+            return True
+
+        return False
+
+    def _upload_file_to_api(self, file_path: str) -> bool:
+        url = "https://wordpress-api.com/upload"
+        files = {'file': open(file_path, 'rb')}
+        response = requests.post(url, files=files)
+        if response.status_code == 200:
+            return True
+
+        return False
--- a/lib/wordpress_api_integration/V2/test_wordpress_api_integration.py
+++ b/lib/wordpress_api_integration/V2/test_wordpress_api_integration.py
@@ -0,0 +1,86 @@
+## test_wordpress_api_integration.py
+
+import os
+import pytest
+from wordpress_api_integration import WordPressAPIIntegration
+
+
+class TestWordPressAPIIntegration:
+    @pytest.fixture
+    def credentials(self):
+        return {
+            "username": "test_user",
+            "password": "test_password"
+        }
+
+    @pytest.fixture
+    def valid_file_path(self):
+        return "path/to/valid/file.jpg"
+
+    @pytest.fixture
+    def invalid_file_path(self):
+        return "path/to/invalid/file.txt"
+
+    def test_upload_file_valid_file(self, credentials, valid_file_path, monkeypatch):
+        def mock_check_file(file_path):
+            return True
+
+        def mock_authenticate():
+            return True
+
+        def mock_upload_file_to_api(file_path):
+            return True
+
+        monkeypatch.setattr(WordPressAPIIntegration, "_check_file", mock_check_file)
+        monkeypatch.setattr(WordPressAPIIntegration, "_authenticate", mock_authenticate)
+        monkeypatch.setattr(WordPressAPIIntegration, "_upload_file_to_api", mock_upload_file_to_api)
+
+        api_integration = WordPressAPIIntegration(credentials)
+        result = api_integration.upload_file(valid_file_path)
+
+        assert result is True
+
+    def test_upload_file_invalid_file(self, credentials, invalid_file_path, monkeypatch):
+        def mock_check_file(file_path):
+            return False
+
+        monkeypatch.setattr(WordPressAPIIntegration, "_check_file", mock_check_file)
+
+        api_integration = WordPressAPIIntegration(credentials)
+        result = api_integration.upload_file(invalid_file_path)
+
+        assert result is False
+
+    def test_upload_file_authentication_failed(self, credentials, valid_file_path, monkeypatch):
+        def mock_check_file(file_path):
+            return True
+
+        def mock_authenticate():
+            return False
+
+        monkeypatch.setattr(WordPressAPIIntegration, "_check_file", mock_check_file)
+        monkeypatch.setattr(WordPressAPIIntegration, "_authenticate", mock_authenticate)
+
+        api_integration = WordPressAPIIntegration(credentials)
+        result = api_integration.upload_file(valid_file_path)
+
+        assert result is False
+
+    def test_upload_file_upload_failed(self, credentials, valid_file_path, monkeypatch):
+        def mock_check_file(file_path):
+            return True
+
+        def mock_authenticate():
+            return True
+
+        def mock_upload_file_to_api(file_path):
+            return False
+
+        monkeypatch.setattr(WordPressAPIIntegration, "_check_file", mock_check_file)
+        monkeypatch.setattr(WordPressAPIIntegration, "_authenticate", mock_authenticate)
+        monkeypatch.setattr(WordPressAPIIntegration, "_upload_file_to_api", mock_upload_file_to_api)
+
+        api_integration = WordPressAPIIntegration(credentials)
+        result = api_integration.upload_file(valid_file_path)
+
+        assert result is False
--- a/lib/wordpress_api_integration/V2/wordpress_api_integration.py
+++ b/lib/wordpress_api_integration/V2/wordpress_api_integration.py
@@ -0,0 +1,54 @@
+## wordpress_api_integration.py
+
+import os
+import requests
+import json
+
+
+class WordPressAPIIntegration:
+    def __init__(self, credentials: dict):
+        self.credentials = credentials
+
+    def upload_file(self, file_path: str) -> bool:
+        if not self._check_file(file_path):
+            return False
+
+        if not self._authenticate():
+            return False
+
+        if not self._upload_file_to_api(file_path):
+            return False
+
+        return True
+
+    def _check_file(self, file_path: str) -> bool:
+        max_file_size = 10 * 1024 * 1024  # 10MB
+        file_size = os.path.getsize(file_path)
+        if file_size > max_file_size:
+            return False
+
+        valid_file_types = ['.jpg', '.jpeg', '.png', '.gif']
+        file_extension = os.path.splitext(file_path)[1]
+        if file_extension not in valid_file_types:
+            return False
+
+        return True
+
+    def _authenticate(self) -> bool:
+        url = "https://wordpress-api.com/authenticate"
+        headers = {'Content-Type': 'application/json'}
+        data = json.dumps(self.credentials)
+        response = requests.post(url, headers=headers, data=data)
+        if response.status_code == 200:
+            return True
+
+        return False
+
+    def _upload_file_to_api(self, file_path: str) -> bool:
+        url = "https://wordpress-api.com/upload"
+        files = {'file': open(file_path, 'rb')}
+        response = requests.post(url, files=files)
+        if response.status_code == 200:
+            return True
+
+        return False
--- a/main_config.ini
+++ b/main_config.ini
@@ -0,0 +1,27 @@
+###################################################
+#
+# This is the main config file which drives the code.
+# This config will restrict code modifications and hence
+# ease of usuability.
+#
+##################################################
+
+
+# Set the Openai  API key	
+openai_api_key=""
+# bard_api=""
+# ms_bing_api=""
+
+# Mention which model to use, default is GPT-3.5
+model_name=""
+
+# Write the prompt for generating TEXT reply from GenAI engine
+txt_prompt=""
+
+# An effective text prompt may have several components, including:
+# Main subject: the who of the prompt, e.g. Small puppy with a fluffy white tail wearing a red collar.
+# Action: the what/how of the prompt, e.g. Joyfully carrying a long wooden stick.
+# Surroundings: the when/where of the prompt, e.g. On a busy street corner at dusk. A small basketball court in the background.
+# Visual aesthetics: how you want the images to look, e.g. Shot from above, soft yellow light, blurred background.
+# Write the prompt for generating IMG reply from GenAI engines
+img_text=""
--- a/4
+++ b/4
@@ -0,0 +1,4 @@
+Move all hard coded values from the modules and put the them in a config file.
+Suggest functions that be improved upon on readibility, polymorphism and remove redundany.
+Make the code conform to PEP standards.
+Include try and except. Include exception at possible places. Include detailed excpetions and error messages.
--- a/pseo_main.py
+++ b/pseo_main.py
@@ -0,0 +1,37 @@
+#!/usr/bin/python3
+
+#########################################################
+#
+# This is the main module for calling pseo related functions.
+# This is the end user interface and is user driven.
+# TBD: argsparser and taking config file. For usuability,
+# no editing of code should be required.
+#
+#########################################################
+import sys
+
+import json
+import traceback
+from loguru import logger
+logger.add(sys.stdout, colorize=True, format="<green>{time}</green> <level>{message}</level>")
+
+from lib.generate_image_from_prompt import generate_image, gen_new_from_given_img
+from lib.get_text_response import get_prompt_reply, generate_detailed_blog
+
+
+try:
+    logger.info("Starting homebrew pseo blog generator.")
+    prompt = "Create a detailed and technical blog of best AI tools for text-to-video conversion in 2023, along with features, pricing, pros, cons, and website links and if free or paid version. Summarize this blog in conclusion at the end. Write in markdown."
+    #txt_reply = get_prompt_reply(prompt, 2000)
+
+    # The idea is to 
+    #generate_image(logger)
+    #gen_new_from_given_img(logger)
+
+    # Generate detailed blog by only providing keywords from blog title.
+    # Example: AI text to video tools
+    generate_detailed_blog("text to video AI tools")
+
+except Exception as err:
+    #logger.exception(f"traceback.print_exc()")
+    logger.error(f"Error occured in main::{err}")
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+loguru
+openai
+Pillow
+requests
+tqdm
+urllib3
				`@@ -0,0 +1,2 @@`
				`https://github.com/hardikvasa/google-images-download`