Fixed bugs and changes in Blog generation template and prompts. WIP.

2023-10-10 17:37:26 +05:30
parent 405b81ceaa
commit 2860345aaf
7 changed files with 398 additions and 126 deletions
--- a/lib/get_text_response.py
+++ b/lib/get_text_response.py
@@ -7,72 +7,21 @@
 #
 ########################################################################

+import json
+
 import openai
 from tqdm import tqdm, trange
 import time
 import re

-
-def get_prompt_reply(prompt, max_token, outputs=1):
-    try:
-        # using OpenAI's Completion module that helps execute
-        # any tasks involving text
-        response = openai.Completion.create(
-            # model name used here is text-davinci-003
-            # there are many other models available under the
-            # umbrella of GPT-3
-            model="text-davinci-003",
-            # passing the user input
-            prompt=prompt,
-            # generated output can have "max_tokens" number of tokens
-            max_tokens=max_token,
-            # number of outputs generated in one call
-            n=outputs
-    )
-    except openai.error.Timeout as e:
-       #Handle timeout error, e.g. retry or log
-       print(f"OpenAI API request timed out: {e}")
-       pass
-    except openai.error.APIError as e:
-       #Handle API error, e.g. retry or log
-       print(f"OpenAI API returned an API Error: {e}")
-       pass
-    except openai.error.APIConnectionError as e:
-       #Handle connection error, e.g. check network or log
-       print(f"OpenAI API request failed to connect: {e}")
-       pass
-    except openai.error.InvalidRequestError as e:
-       #Handle invalid request error, e.g. validate parameters or log
-       print(f"OpenAI API request was invalid: {e}")
-       pass
-    except openai.error.AuthenticationError as e:
-       #Handle authentication error, e.g. check credentials or log
-       print(f"OpenAI API request was not authorized: {e}")
-       pass
-    except openai.error.PermissionError as e:
-       #Handle permission error, e.g. check scope or log
-       print(f"OpenAI API request was not permitted: {e}")
-       pass
-    except openai.error.RateLimitError as e:
-       #Handle rate limit error, e.g. wait or log
-       print(f"OpenAI API request exceeded rate limit: {e}")
-       pass
-
-    print(f"Prompt output: {response.choices[0].text.strip()}")
-    # creating a list to store all the outputs
-    output = list()
-    for k in response['choices']:
-        output.append(k['text'].strip())
-    return output
+from .gpt_providers.openai_gpt_provider import openai_chatgpt


-def generate_detailed_blog(blog_keywords):
+def generate_detailed_blog(num_blogs, blog_keywords, niche):
    """
    This function will take a blog Topic to first generate sections for it
    and then generate content for each section.
    """
-
-    # TBD
    # I want you to act as a blogger and you want to write a blog post about [topic], 
    # with a friendly and approachable tone that engages readers. 
    # Your target audience is [define your target audience]. 
@@ -85,101 +34,191 @@ def generate_detailed_blog(blog_keywords):

    # Use to store the blog in a string, to save in a *.md file.
    blog_markdown_str = ""
-    blog_topic_arr = list(generate_blog_topics(blog_keywords).split("\n"))
-    # Remove null values and incomplete results.
-    while('' in blog_topic_arr):
-        blog_topic_arr.remove('')
-
+    
+    blog_topic_arr = generate_blog_topics(blog_keywords, num_blogs, niche)
    print(f"Generated Blog Topics:---- {blog_topic_arr}")
    
    # For each of blog topic, generate content.
    for a_blog_topic in blog_topic_arr:
-        # Error in generating topic content: Rate limit reached for default-global-with-image-limits 
-        # in free account on requests per min. Limit: 3 / min. Please try again in 20s.
-        for i in trange(30):
-            time.sleep(1)
-        # The generated topics usually have 1) or ^\W*\D* . Remove them from prompt.
-        a_topic = re.sub(r"^\W*\D*", "", a_blog_topic)
+        # if md/html
+        blog_markdown_str = "# " + a_blog_topic + "\n"
        
-        tpc_cnt = generate_topic_content(a_topic)
-        print(f"{a_topic} ------ {tpc_cnt}")
+        # Get the introduction specific to blog title and sub topics.
+        tpc_outlines = generate_topic_outline(a_blog_topic)
+        blog_intro = get_blog_intro(a_blog_topic, tpc_outlines)
+        blog_markdown_str = blog_markdown_str + "### Introduction" + "\n" + f"{blog_intro}" + "\n"

-        # We now need to concatenate all the sections and sew it into blog content.
-        tmp_blog_markdown_str = blog_markdown_str + " " + a_blog_topic + " " + f"{tpc_cnt}"
-        blog_markdown_str = blog_markdown_str + a_blog_topic + "\n\n" + f"{tpc_cnt}" + "\n\n"
+        # Now, for each blog we have sub topic. Generate content for each of the sub topic.
+        for a_outline in tpc_outlines:
+            sub_topic_content = generate_topic_content(blog_keywords, a_outline)
+            blog_markdown_str = blog_markdown_str + "\n" + f"\n{sub_topic_content}" + "\n"
+            blog_markdown_str = blog_markdown_str + "\n" + "-------------------------" + "\n"
+
+        # Get the Conclusion of the blog, by passing the generated blog.
+        blog_conclusion = get_blog_conclusion(blog_markdown_str)
+        blog_markdown_str = blog_markdown_str + "# Conclusion" + "\n" + f"{blog_conclusion}" + "\n"
+
+        # print/check the final blog content.
+        print(f"Final blog content: {blog_markdown_str}")
+        # Save the blog content as a .md file. Markdown or HTML ?
+        save_blog_to_file(blog_markdown_str)
+
+        exit(1)

-    # print/check the final blog content.
-    print(f"Final blog content: {blog_markdown_str}")
-    # Save the blog content as a .md file. Markdown or HTML ?
    # Use chatgpt to convert the text into HTML or markdown.

    # Now, we need perform some *basic checks on the blog content, such as:
    # is_content_ai_generated.py, plagiarism_checker_from_known_sources.py
    # seo_analyzer.py . These are present in the lib folder.
-    # prompt: Rewrite, improve and paraphrase [text] and use headings and subheadings to break up the content and make it easier to read using the keyword [keyword].
+    # prompt: Rewrite, improve and paraphrase [text] and use headings and subheadings 
+    # to break up the content and make it easier to read using the keyword [keyword].



-def generate_blog_topics(blog_keywords):
+def generate_blog_topics(blog_keywords, num_blogs, niche):
    """
    For a given prompt, generate blog topics.
    Using the davinci-instruct-beta-v3 model. It’s proven to be an ideal 
    one for generating unique blog content.
-    Ex: Generate SEO optimized blog topics on AI text to image with Python
+    Ex: Generate SEO optimized blog topics on given keywords
    """
-    # Prompt engineering, huh ?
-    # Create a blog post about “{blogPostTopic}” . Write it in a “{tone}” tone. Use transition words. 
-    # Use active voice. Write over 1000 words. The blog post should be in a beginners guide style. 
-    # Add title and subtitle for each section. It should have a minimum of 6 sections. 
-    # Include the following keywords: “{keywords}”. Create a good slug for this post and a 
-    # meta description with a maximum of 100 words. and add it to the end of the blog post
-
-    prompt = f"As an experienced AI scientist and technical writer, generate SEO optimized blog topics about {blog_keywords}."
-    #prompt = "Generate SEO optimized blog topics for" + " " + f"{blog_keywords}"
-    try:
-        response = openai.Completion.create(
-            engine="davinci-instruct-beta-v3",
-            prompt=prompt,
-            temperature=0.7,
-            max_tokens=100,
-            top_p=1,
-            frequency_penalty=0,
-            presence_penalty=0
+    # Get more keywords, based on user given keywords.
+    # Beware of keywords stuffing, clustering, semantic should help avoid.
+    more_keywords = get_related_keywords(num_blogs, blog_keywords, niche)
+    # f"including the following keywords: {more_keywords}." 
+    prompt = ("As an SEO specialist and blog content writer, "
+            f"please write {num_blogs} catchy and SEO-friendly blog topics on {blog_keywords},"
+            f"including the following keywords: {more_keywords}."
        )
-        return response.choices[0].text
+    print(f"prompt used for blog titles: {prompt}")
+    # Calculate the max tokens based on the number of blogs
+    max_tokens = min(1000, num_blogs * 100)
+    try:
+        response = openai_chatgpt(
+                prompt, 
+                model="text-davinci-003", 
+                temperature=0.9, 
+                max_tokens=max_tokens, 
+                top_p=0.9,
+                n=1
+                )
+        topic_list = extract_key_text(response)
+        return(topic_list)
    except Exception as err:
-        print(f"Error in generating blog topics: {err}")
+        SystemError(f"Error in generating blog topics: {err}")


-def generate_topic_content(prompt):
+def generate_topic_outline(blog_title):
+    """
+    Given a blog title generate an outline for it
+    """
+    # TBD: Remove hardcoding, make dynamic
+    prompt = ("As a technical writer and SEO expert, suggest 7 beginner-friendly and helpful sub-topics"
+            f"for the blog title '{blog_title}',"
+            "Include 2 sub topics on related long-tailed keywords and "
+            "2 sub topics on most popular questions."
+            )
+    print(f"prompt used for blog title Outline :{prompt}")
+    # TBD: Add logic for which_provider and which_model
+    response = openai_chatgpt(
+            prompt,
+            model="text-davinci-003",
+            temperature=0.7,
+            max_tokens=1000,
+            top_p=0.9,
+            n=1
+            )
+    text_values = []
+    for choice in response["choices"]:
+        text_values.extend(choice["text"].split("\n"))
+    return ([element for element in text_values if element])
+
+
+def generate_topic_content(blog_keywords, sub_topic):
    """
    For each of given topic generate content for it.
    """
+    # The outline should contain various subheadings and include the starting sentence for each section.
+    prompt = (f"As a professional writer and topic authority on '{blog_keywords}',"
+            f"craft a captivating, inviting and factual (no more than 700 characters) blog content on {sub_topic}."
+            f"Use bulleit points and other readibility enhancers."
+            )
    try:
-        # Generate a blog post outline for the following topic: {topic}. 
-        # The outline should contain various subheadings and include the starting sentence for each section.
-        prompt = f"As an experienced AI researcher and technical writer, blog about {prompt}."
-        response = openai.Completion.create(
-            engine="davinci-instruct-beta-v3",
-            prompt=prompt,
+        response = openai_chatgpt(prompt)
+        response = openai_chatgpt(
+            prompt,
+            model="text-davinci-003",
            temperature=0.7,
-            max_tokens=500,
-            top_p=1,
-            frequency_penalty=0,
-            presence_penalty=0
-        )
+            max_tokens=1000,
+            top_p=0.9,
+            n=1
+            )
+        text_values = []
+        for choice in response["choices"]:
+            text_values.extend(choice["text"].split("\n"))
+        return (' '.join([element for element in text_values if element]))
    except Exception as err:
-        print(f"Error in generating topic content: {err}")
+        SystemError(f"Error in generating topic content: {err}")

    return response.choices[0].text


+def get_blog_intro(blog_title, blog_topics):
+    """
+    Generate blog introduction as per title and sub topics
+    """
+    prompt = (f"As a professional writer, craft a captivating, inviting, and concise (no more than 550 characters)"
+            f"introduction for the blog titled '{blog_title}' with the following sub-topics: '{blog_topics}'"
+            f"The introduction should compel readers to delve deeper into the blog post."
+            )
+    try:
+        # TBD: Add logic for which_provider and which_model
+        response = openai_chatgpt(
+            prompt,
+            model="text-davinci-003",
+            temperature=0.7,
+            max_tokens=1000,
+            top_p=0.9,
+            n=1
+            )
+        text_values = []
+        for choice in response["choices"]:
+            text_values.extend(choice["text"].split("\n"))
+        return (' '.join([element for element in text_values if element]))
+    except Exception as err:
+        SystemError(f"Error in generating topic content: {err}")
+
+
+def get_blog_conclusion(blog_content):
+    """
+    Accepts a blog content and concludes it.
+    """
+    prompt = ("As an expert SEO and blog writer, please conclude the given blog providing vital take aways,"
+            "summarise key points (no more than 300 characters). The blog content: '{blog_content}'"
+            )
+    try:
+        # TBD: Add logic for which_provider and which_model
+        response = openai_chatgpt(
+            prompt,
+            model="text-davinci-003",
+            temperature=0.9,
+            max_tokens=450,
+            top_p=0.7,
+            n=1
+        )
+        text_values = []
+        for choice in response["choices"]:
+            text_values.extend(choice["text"].split("\n"))
+        return (' '.join([element for element in text_values if element]))
+    except Exception as err:
+        SystemError(f"Error in generating blog conclusion: {err}")
+
+
 def generate_blog_description():
    """
        Prompt designed to give SEO optimized blog descripton
    """
    # Suggest keywords that I should include in my meta description for my blog post on [topic]
-
    # I want to generate high CTR meta and keyword rich meta title and meta descriptions in text format. 
    # My keywords are – [keyword 1], [keyword 2], [keyword 3]

@@ -198,5 +237,110 @@ def get_long_tailed_keywords(blog_article):
    """
        Function to get long tailed keywords for the blog article.
    """
-    #  want you to generate a list of long-tail keywords that are related to the following blog post [Enter blog post text here]
+    # Want you to generate a list of long-tail keywords that are related 
+    # to the following blog post [Enter blog post text here]
    pass
+
+
+def save_blog_to_file(blog_content, file_type="md"):
+    """ Common function to save the generated blog to a file.
+    arg: file_type can be md or html
+    """
+    output_path = "../generated_blogs"
+    if not os.path.exists(output_path):
+        # If the directory does not exist, create it
+        os.makedirs(output_path)
+
+    output_today = os.path.join(output_path, f'{datetime.date.today().strftime("%d-%m-%y")}')
+    if not os.path.exists(output_today):
+        os.makedirs(output_today)
+    else:
+        with open(f"{output_today}/{blog_title}.md", "w") as f:
+            f.write(blog_content)
+
+
+def extract_key_text(json_data):
+    """Extracts key text from a given JSON object.
+        Args:json_data: A JSON object.
+        Returns: A list of strings containing the key text.
+        Raises: ValueError: If the JSON object is not valid.
+    """
+
+    try:
+        # Extract the "choices" key from the JSON object.
+        choices = json_data["choices"]
+
+        # Iterate over the "choices" list and extract the "text" key from each item.
+        key_text = []
+        for choice in choices:
+            text = choice["text"]
+
+            # Split the text into a list of sentences.
+            sentences = text.split("\n")
+
+            # Iterate over the list of sentences and extract the first sentence.
+            for sentence in sentences:
+                # The generated topics usually have 1) or ^\W*\D* . Remove them from prompt.
+                new_str = sentence.replace("'", '')
+                new_str = re.sub(r'^(\d*\.)', '', new_str)
+                key_text.append(new_str)
+
+        # Remove duplicate key text.
+        key_text = list(set(key_text))
+        # Remove empty values.
+        key_text = [i for i in key_text if i]
+        return key_text
+    except KeyError as e:
+        raise ValueError(f"Missing key in JSON object: {e.args[0]}")
+    except TypeError as e:
+        raise ValueError(f"Invalid JSON object: {e.args[0]}")
+
+
+def get_related_keywords(num_blogs, keywords, niche):
+    """
+    Helper function to get more keywords from GPTs.
+    """
+    # Check if niche: use long tailed, else use popular keywords.
+    if niche:
+        prompt = (f"Generate a list without description of the top {num_blogs} most popular and semantically"
+                f"related long-tailed keywords and entities for the topic of {keywords} that are used in"
+                "high-quality content and relevant to my competitors."
+                )
+    else:
+        prompt = (f"Generate a list without description of the top {num_blogs} most popular and"
+                f" semantically related keywords and entities for the topic of {keywords} that are used"
+                " in high-quality content and relevant to my competitors."
+                )
+    # TBD: Add logic for which_provider and which_model
+    response = openai_chatgpt(
+            prompt,
+            model="text-davinci-003",
+            temperature=0.7,
+            max_tokens=100,
+            top_p=0.9,
+            n=10 
+            )
+
+    # Extract the keywords from the response
+    keywords = []
+    for choice in response.choices:
+        # Split the response into words
+        words = choice.text.split(" ")
+
+    # Add the words to the list of keywords
+    for text in words:
+        # Remove digits
+        text = re.sub(r'\d', '', text)
+
+        # Remove special characters
+        text = re.sub(r'[^\w\s]', '', text)
+        # Remove newline characters
+        text = text.replace('\n', '')
+
+        keywords.append(text)
+
+    # Remove any duplicate keywords
+    keywords = set(keywords)
+
+    # Return the list of keywords
+    return (' '.join(keywords))