From 2860345aaf8f7561e38f2a5a026023e3e65fb521 Mon Sep 17 00:00:00 2001 From: AjaySi Date: Tue, 10 Oct 2023 17:37:26 +0530 Subject: [PATCH] Fixed bugs and changes in Blog generation template and prompts. WIP. --- README.md | 15 +- lib/get_text_response.py | 368 ++++++++++++++++------- lib/gpt_providers/README.md | 19 ++ lib/gpt_providers/openai_gpt_provider.py | 57 ++++ main_config.ini | 15 + prompts/blog_ideas_prompts.md | 20 ++ pseo_main.py | 30 +- 7 files changed, 398 insertions(+), 126 deletions(-) create mode 100644 lib/gpt_providers/README.md create mode 100644 lib/gpt_providers/openai_gpt_provider.py diff --git a/README.md b/README.md index db406f29..032423f8 100644 --- a/README.md +++ b/README.md @@ -42,10 +42,23 @@ options: --niche NICHE Whether the blog is a niche blog (default: False). *Example: -python3 pseo_main.py --num_blogs "10" --keywords Python, programming, data science --niche True +python3 pseo_main.py --num_blogs "10" --keywords "Python, programming, data science" --niche True ---------------------------------- +The generated blogs are present in generated_blogs folder. Presently, the blog template is rigid and follows the +below pattern: +[Blog Title] +[Introduction of n chars] +[Body] +[Body][topic][content of n chars on sub-topic] +[Conclusion] + +TBD: More templates and an easy way to change prompts are in pipeline. + +----------------------------------- + + # The detailed SEO checks are as follows: - Keyword Density diff --git a/lib/get_text_response.py b/lib/get_text_response.py index d05b4e6e..f889b696 100644 --- a/lib/get_text_response.py +++ b/lib/get_text_response.py @@ -7,72 +7,21 @@ # ######################################################################## +import json + import openai from tqdm import tqdm, trange import time import re - -def get_prompt_reply(prompt, max_token, outputs=1): - try: - # using OpenAI's Completion module that helps execute - # any tasks involving text - response = openai.Completion.create( - # model name used here is text-davinci-003 - # there are many other models available under the - # umbrella of GPT-3 - model="text-davinci-003", - # passing the user input - prompt=prompt, - # generated output can have "max_tokens" number of tokens - max_tokens=max_token, - # number of outputs generated in one call - n=outputs - ) - except openai.error.Timeout as e: - #Handle timeout error, e.g. retry or log - print(f"OpenAI API request timed out: {e}") - pass - except openai.error.APIError as e: - #Handle API error, e.g. retry or log - print(f"OpenAI API returned an API Error: {e}") - pass - except openai.error.APIConnectionError as e: - #Handle connection error, e.g. check network or log - print(f"OpenAI API request failed to connect: {e}") - pass - except openai.error.InvalidRequestError as e: - #Handle invalid request error, e.g. validate parameters or log - print(f"OpenAI API request was invalid: {e}") - pass - except openai.error.AuthenticationError as e: - #Handle authentication error, e.g. check credentials or log - print(f"OpenAI API request was not authorized: {e}") - pass - except openai.error.PermissionError as e: - #Handle permission error, e.g. check scope or log - print(f"OpenAI API request was not permitted: {e}") - pass - except openai.error.RateLimitError as e: - #Handle rate limit error, e.g. wait or log - print(f"OpenAI API request exceeded rate limit: {e}") - pass - - print(f"Prompt output: {response.choices[0].text.strip()}") - # creating a list to store all the outputs - output = list() - for k in response['choices']: - output.append(k['text'].strip()) - return output +from .gpt_providers.openai_gpt_provider import openai_chatgpt -def generate_detailed_blog(blog_keywords): +def generate_detailed_blog(num_blogs, blog_keywords, niche): """ This function will take a blog Topic to first generate sections for it and then generate content for each section. """ - - # TBD # I want you to act as a blogger and you want to write a blog post about [topic], # with a friendly and approachable tone that engages readers. # Your target audience is [define your target audience]. @@ -85,101 +34,191 @@ def generate_detailed_blog(blog_keywords): # Use to store the blog in a string, to save in a *.md file. blog_markdown_str = "" - blog_topic_arr = list(generate_blog_topics(blog_keywords).split("\n")) - # Remove null values and incomplete results. - while('' in blog_topic_arr): - blog_topic_arr.remove('') - + + blog_topic_arr = generate_blog_topics(blog_keywords, num_blogs, niche) print(f"Generated Blog Topics:---- {blog_topic_arr}") # For each of blog topic, generate content. for a_blog_topic in blog_topic_arr: - # Error in generating topic content: Rate limit reached for default-global-with-image-limits - # in free account on requests per min. Limit: 3 / min. Please try again in 20s. - for i in trange(30): - time.sleep(1) - # The generated topics usually have 1) or ^\W*\D* . Remove them from prompt. - a_topic = re.sub(r"^\W*\D*", "", a_blog_topic) + # if md/html + blog_markdown_str = "# " + a_blog_topic + "\n" - tpc_cnt = generate_topic_content(a_topic) - print(f"{a_topic} ------ {tpc_cnt}") + # Get the introduction specific to blog title and sub topics. + tpc_outlines = generate_topic_outline(a_blog_topic) + blog_intro = get_blog_intro(a_blog_topic, tpc_outlines) + blog_markdown_str = blog_markdown_str + "### Introduction" + "\n" + f"{blog_intro}" + "\n" - # We now need to concatenate all the sections and sew it into blog content. - tmp_blog_markdown_str = blog_markdown_str + " " + a_blog_topic + " " + f"{tpc_cnt}" - blog_markdown_str = blog_markdown_str + a_blog_topic + "\n\n" + f"{tpc_cnt}" + "\n\n" + # Now, for each blog we have sub topic. Generate content for each of the sub topic. + for a_outline in tpc_outlines: + sub_topic_content = generate_topic_content(blog_keywords, a_outline) + blog_markdown_str = blog_markdown_str + "\n" + f"\n{sub_topic_content}" + "\n" + blog_markdown_str = blog_markdown_str + "\n" + "-------------------------" + "\n" + + # Get the Conclusion of the blog, by passing the generated blog. + blog_conclusion = get_blog_conclusion(blog_markdown_str) + blog_markdown_str = blog_markdown_str + "# Conclusion" + "\n" + f"{blog_conclusion}" + "\n" + + # print/check the final blog content. + print(f"Final blog content: {blog_markdown_str}") + # Save the blog content as a .md file. Markdown or HTML ? + save_blog_to_file(blog_markdown_str) + + exit(1) - # print/check the final blog content. - print(f"Final blog content: {blog_markdown_str}") - # Save the blog content as a .md file. Markdown or HTML ? # Use chatgpt to convert the text into HTML or markdown. # Now, we need perform some *basic checks on the blog content, such as: # is_content_ai_generated.py, plagiarism_checker_from_known_sources.py # seo_analyzer.py . These are present in the lib folder. - # prompt: Rewrite, improve and paraphrase [text] and use headings and subheadings to break up the content and make it easier to read using the keyword [keyword]. + # prompt: Rewrite, improve and paraphrase [text] and use headings and subheadings + # to break up the content and make it easier to read using the keyword [keyword]. -def generate_blog_topics(blog_keywords): +def generate_blog_topics(blog_keywords, num_blogs, niche): """ For a given prompt, generate blog topics. Using the davinci-instruct-beta-v3 model. It’s proven to be an ideal one for generating unique blog content. - Ex: Generate SEO optimized blog topics on AI text to image with Python + Ex: Generate SEO optimized blog topics on given keywords """ - # Prompt engineering, huh ? - # Create a blog post about “{blogPostTopic}” . Write it in a “{tone}” tone. Use transition words. - # Use active voice. Write over 1000 words. The blog post should be in a beginners guide style. - # Add title and subtitle for each section. It should have a minimum of 6 sections. - # Include the following keywords: “{keywords}”. Create a good slug for this post and a - # meta description with a maximum of 100 words. and add it to the end of the blog post - - prompt = f"As an experienced AI scientist and technical writer, generate SEO optimized blog topics about {blog_keywords}." - #prompt = "Generate SEO optimized blog topics for" + " " + f"{blog_keywords}" - try: - response = openai.Completion.create( - engine="davinci-instruct-beta-v3", - prompt=prompt, - temperature=0.7, - max_tokens=100, - top_p=1, - frequency_penalty=0, - presence_penalty=0 + # Get more keywords, based on user given keywords. + # Beware of keywords stuffing, clustering, semantic should help avoid. + more_keywords = get_related_keywords(num_blogs, blog_keywords, niche) + # f"including the following keywords: {more_keywords}." + prompt = ("As an SEO specialist and blog content writer, " + f"please write {num_blogs} catchy and SEO-friendly blog topics on {blog_keywords}," + f"including the following keywords: {more_keywords}." ) - return response.choices[0].text + print(f"prompt used for blog titles: {prompt}") + # Calculate the max tokens based on the number of blogs + max_tokens = min(1000, num_blogs * 100) + try: + response = openai_chatgpt( + prompt, + model="text-davinci-003", + temperature=0.9, + max_tokens=max_tokens, + top_p=0.9, + n=1 + ) + topic_list = extract_key_text(response) + return(topic_list) except Exception as err: - print(f"Error in generating blog topics: {err}") + SystemError(f"Error in generating blog topics: {err}") -def generate_topic_content(prompt): +def generate_topic_outline(blog_title): + """ + Given a blog title generate an outline for it + """ + # TBD: Remove hardcoding, make dynamic + prompt = ("As a technical writer and SEO expert, suggest 7 beginner-friendly and helpful sub-topics" + f"for the blog title '{blog_title}'," + "Include 2 sub topics on related long-tailed keywords and " + "2 sub topics on most popular questions." + ) + print(f"prompt used for blog title Outline :{prompt}") + # TBD: Add logic for which_provider and which_model + response = openai_chatgpt( + prompt, + model="text-davinci-003", + temperature=0.7, + max_tokens=1000, + top_p=0.9, + n=1 + ) + text_values = [] + for choice in response["choices"]: + text_values.extend(choice["text"].split("\n")) + return ([element for element in text_values if element]) + + +def generate_topic_content(blog_keywords, sub_topic): """ For each of given topic generate content for it. """ + # The outline should contain various subheadings and include the starting sentence for each section. + prompt = (f"As a professional writer and topic authority on '{blog_keywords}'," + f"craft a captivating, inviting and factual (no more than 700 characters) blog content on {sub_topic}." + f"Use bulleit points and other readibility enhancers." + ) try: - # Generate a blog post outline for the following topic: {topic}. - # The outline should contain various subheadings and include the starting sentence for each section. - prompt = f"As an experienced AI researcher and technical writer, blog about {prompt}." - response = openai.Completion.create( - engine="davinci-instruct-beta-v3", - prompt=prompt, + response = openai_chatgpt(prompt) + response = openai_chatgpt( + prompt, + model="text-davinci-003", temperature=0.7, - max_tokens=500, - top_p=1, - frequency_penalty=0, - presence_penalty=0 - ) + max_tokens=1000, + top_p=0.9, + n=1 + ) + text_values = [] + for choice in response["choices"]: + text_values.extend(choice["text"].split("\n")) + return (' '.join([element for element in text_values if element])) except Exception as err: - print(f"Error in generating topic content: {err}") + SystemError(f"Error in generating topic content: {err}") return response.choices[0].text +def get_blog_intro(blog_title, blog_topics): + """ + Generate blog introduction as per title and sub topics + """ + prompt = (f"As a professional writer, craft a captivating, inviting, and concise (no more than 550 characters)" + f"introduction for the blog titled '{blog_title}' with the following sub-topics: '{blog_topics}'" + f"The introduction should compel readers to delve deeper into the blog post." + ) + try: + # TBD: Add logic for which_provider and which_model + response = openai_chatgpt( + prompt, + model="text-davinci-003", + temperature=0.7, + max_tokens=1000, + top_p=0.9, + n=1 + ) + text_values = [] + for choice in response["choices"]: + text_values.extend(choice["text"].split("\n")) + return (' '.join([element for element in text_values if element])) + except Exception as err: + SystemError(f"Error in generating topic content: {err}") + + +def get_blog_conclusion(blog_content): + """ + Accepts a blog content and concludes it. + """ + prompt = ("As an expert SEO and blog writer, please conclude the given blog providing vital take aways," + "summarise key points (no more than 300 characters). The blog content: '{blog_content}'" + ) + try: + # TBD: Add logic for which_provider and which_model + response = openai_chatgpt( + prompt, + model="text-davinci-003", + temperature=0.9, + max_tokens=450, + top_p=0.7, + n=1 + ) + text_values = [] + for choice in response["choices"]: + text_values.extend(choice["text"].split("\n")) + return (' '.join([element for element in text_values if element])) + except Exception as err: + SystemError(f"Error in generating blog conclusion: {err}") + + def generate_blog_description(): """ Prompt designed to give SEO optimized blog descripton """ # Suggest keywords that I should include in my meta description for my blog post on [topic] - # I want to generate high CTR meta and keyword rich meta title and meta descriptions in text format. # My keywords are – [keyword 1], [keyword 2], [keyword 3] @@ -198,5 +237,110 @@ def get_long_tailed_keywords(blog_article): """ Function to get long tailed keywords for the blog article. """ - # want you to generate a list of long-tail keywords that are related to the following blog post [Enter blog post text here] + # Want you to generate a list of long-tail keywords that are related + # to the following blog post [Enter blog post text here] pass + + +def save_blog_to_file(blog_content, file_type="md"): + """ Common function to save the generated blog to a file. + arg: file_type can be md or html + """ + output_path = "../generated_blogs" + if not os.path.exists(output_path): + # If the directory does not exist, create it + os.makedirs(output_path) + + output_today = os.path.join(output_path, f'{datetime.date.today().strftime("%d-%m-%y")}') + if not os.path.exists(output_today): + os.makedirs(output_today) + else: + with open(f"{output_today}/{blog_title}.md", "w") as f: + f.write(blog_content) + + +def extract_key_text(json_data): + """Extracts key text from a given JSON object. + Args:json_data: A JSON object. + Returns: A list of strings containing the key text. + Raises: ValueError: If the JSON object is not valid. + """ + + try: + # Extract the "choices" key from the JSON object. + choices = json_data["choices"] + + # Iterate over the "choices" list and extract the "text" key from each item. + key_text = [] + for choice in choices: + text = choice["text"] + + # Split the text into a list of sentences. + sentences = text.split("\n") + + # Iterate over the list of sentences and extract the first sentence. + for sentence in sentences: + # The generated topics usually have 1) or ^\W*\D* . Remove them from prompt. + new_str = sentence.replace("'", '') + new_str = re.sub(r'^(\d*\.)', '', new_str) + key_text.append(new_str) + + # Remove duplicate key text. + key_text = list(set(key_text)) + # Remove empty values. + key_text = [i for i in key_text if i] + return key_text + except KeyError as e: + raise ValueError(f"Missing key in JSON object: {e.args[0]}") + except TypeError as e: + raise ValueError(f"Invalid JSON object: {e.args[0]}") + + +def get_related_keywords(num_blogs, keywords, niche): + """ + Helper function to get more keywords from GPTs. + """ + # Check if niche: use long tailed, else use popular keywords. + if niche: + prompt = (f"Generate a list without description of the top {num_blogs} most popular and semantically" + f"related long-tailed keywords and entities for the topic of {keywords} that are used in" + "high-quality content and relevant to my competitors." + ) + else: + prompt = (f"Generate a list without description of the top {num_blogs} most popular and" + f" semantically related keywords and entities for the topic of {keywords} that are used" + " in high-quality content and relevant to my competitors." + ) + # TBD: Add logic for which_provider and which_model + response = openai_chatgpt( + prompt, + model="text-davinci-003", + temperature=0.7, + max_tokens=100, + top_p=0.9, + n=10 + ) + + # Extract the keywords from the response + keywords = [] + for choice in response.choices: + # Split the response into words + words = choice.text.split(" ") + + # Add the words to the list of keywords + for text in words: + # Remove digits + text = re.sub(r'\d', '', text) + + # Remove special characters + text = re.sub(r'[^\w\s]', '', text) + # Remove newline characters + text = text.replace('\n', '') + + keywords.append(text) + + # Remove any duplicate keywords + keywords = set(keywords) + + # Return the list of keywords + return (' '.join(keywords)) diff --git a/lib/gpt_providers/README.md b/lib/gpt_providers/README.md new file mode 100644 index 00000000..79df7ed4 --- /dev/null +++ b/lib/gpt_providers/README.md @@ -0,0 +1,19 @@ +gpt_providers are companies providing commercial/free GPT pre-trained models as saas. +These include openai, Azure, Goodle, FB, Anthrophic etc + +- If you want to use chatgpt and its models, then use openai as gpt_provider +- We plan to integrate most the accurate, widely used models as gpt providers. +- These will also include text to image and video generations as blogging artifacts. + +gpt_provider=openai + +------------------------------------ + +Here are some tips for using LLMs to generate ideas: + +- Be as specific as possible in your prompts. The more specific you are, the better the LLM will +be able to understand what you are asking for. +- Use keywords in your prompts. This will help the LLM to generate ideas that are relevant to your topic. +- Try different temperatures and top_p values. These parameters control the creativity and diversity of the generated ideas. +- Experiment with different prompts and settings to see what works best for you. + diff --git a/lib/gpt_providers/openai_gpt_provider.py b/lib/gpt_providers/openai_gpt_provider.py new file mode 100644 index 00000000..2aa4dcb8 --- /dev/null +++ b/lib/gpt_providers/openai_gpt_provider.py @@ -0,0 +1,57 @@ +######################################################## +# +# openai chatgpt integration for blog generation. +# Choosing a model from openai and fine tuning its various paramters. +# +######################################################## + +from tqdm import tqdm, trange +import openai +import time # I wish + + +def openai_chatgpt(prompt, model="text-davinci-003", temperature=0.5, max_tokens=2048, top_p=0.9, n=10): + try: + # Error in generating topic content: Rate limit reached for default-global-with-image-limits + # in free account on requests per min. Limit: 3 / min. Please try again in 20s. + for i in trange(21): + time.sleep(1) + # using OpenAI's Completion module that helps execute + # any tasks involving text + response = openai.Completion.create( + # model name used here is text-davinci-003 + # there are many other models available under the + # umbrella of GPT-3 + model="text-davinci-003", + # passing the user input + prompt=prompt, + # generated output can have "max_tokens" number of tokens + max_tokens=max_tokens, + # number of outputs generated in one call + n=n, + top_p=top_p, + #frequency_penalty=0, + #presence_penalty=0 + ) + return(response) + except openai.error.Timeout as e: + #Handle timeout error, e.g. retry or log + SystemError(f"OpenAI API request timed out: {e}") + except openai.error.APIError as e: + #Handle API error, e.g. retry or log + SystemError(f"OpenAI API returned an API Error: {e}") + except openai.error.APIConnectionError as e: + #Handle connection error, e.g. check network or log + SystemError(f"OpenAI API request failed to connect: {e}") + except openai.error.InvalidRequestError as e: + #Handle invalid request error, e.g. validate parameters or log + SystemError(f"OpenAI API request was invalid: {e}") + except openai.error.AuthenticationError as e: + #Handle authentication error, e.g. check credentials or log + SystemError(f"OpenAI API request was not authorized: {e}") + except openai.error.PermissionError as e: + #Handle permission error, e.g. check scope or log + SystemError(f"OpenAI API request was not permitted: {e}") + except openai.error.RateLimitError as e: + #Handle rate limit error, e.g. wait or log + SystemError(f"OpenAI API request exceeded rate limit: {e}") diff --git a/main_config.ini b/main_config.ini index 21eb3a10..6bdf206d 100644 --- a/main_config.ini +++ b/main_config.ini @@ -17,6 +17,21 @@ openai_api_key="" model_name="" +################################################### +# +# Define Blog Content charateristics +# +################################################### + +blog_tone="professional" +blog_character="Use transition words. Use active voice." +blog_tempo="???" +blog_audience="begginer style" +search_intent = [informational, commercial, transactional] +buyer_stage= [awareness, consideration, decision] +target_audience = "small businesses in the United States" + + ################################################### # # Wordpress and WIX integration and details diff --git a/prompts/blog_ideas_prompts.md b/prompts/blog_ideas_prompts.md index 7d002413..f52f5159 100644 --- a/prompts/blog_ideas_prompts.md +++ b/prompts/blog_ideas_prompts.md @@ -8,6 +8,26 @@ # We can craft prompts to get an idea on what to generate blogs on. # Divide them in topic and write for most searched ones, as below: +When using GPT to generate content, it is important to provide it with clear and concise instructions. +For example, if you are asking GPT to generate a blog post outline, you should provide it with the following information: + +- Topic: What is the topic of the blog post? +- Audience: Who is the target audience for the blog post? +- Purpose: What is the purpose of the blog post? (To inform, entertain, sell, etc.) +- Keywords: What keywords do you want the blog post to rank for? + +------------------------------------------------------------------- + +Generate a list of the top {X} most popular and semantically related keywords and entities for the topic of {X}, categorized by search intent (informational, commercial, transactional). + +Generate a list of the top {X} most popular and semantically related long-tail keywords and entities for the topic of {X}, categorized by buyer stage (awareness, consideration, decision). + +Generate a list of the top {X} most popular and semantically related keywords and entities for the topic of {X} that are relevant to my target audience (e.g., small businesses in the United States). + +Generate a list of the top {X} most popular and semantically related keywords and entities for the topic of {X} that are used in high-quality content. + +Generate a list of the top {X} most popular and semantically related keywords and entities for the topic of {X} that are relevant to my competitors. + ------------------------------------------------------------------- --- Write seven subheadings for the blog article with the title [title]; the titles should be catchy and 60 characters max. diff --git a/pseo_main.py b/pseo_main.py index 6f6e7b80..8e25389b 100644 --- a/pseo_main.py +++ b/pseo_main.py @@ -14,10 +14,14 @@ import argparse import json import traceback from loguru import logger -logger.add(sys.stdout, colorize=True, format="{time} {message}") +logger.remove() +logger.add(sys.stdout, + colorize=True, + format="{level}|{file}:{line}:{function}| {message}" + ) from lib.generate_image_from_prompt import generate_image, gen_new_from_given_img -from lib.get_text_response import get_prompt_reply, generate_detailed_blog +from lib.get_text_response import generate_detailed_blog def main(): @@ -31,24 +35,24 @@ def main(): parser = argparse.ArgumentParser( description="Accepts user input for the number of blogs, keywords, and niche." ) - parser.add_argument("--num_blogs", type=int, default=1, help="The number of blogs (default: 1).") - parser.add_argument("--keywords", type=str, required=True, help="The keywords.") - parser.add_argument("--niche", type=bool, default=False, help="Whether the blog is a niche blog (default: False).") + parser.add_argument("--num_blogs", type=int, default=1, help="The number of blogs (default: 5).") + parser.add_argument("--keywords", type=str, required=True, help="The keywords.A broad idea to write multiple blogs on.") + parser.add_argument("--niche", type=bool, default=False, help="Written blogs on long tailed search topics (default: False).") args = parser.parse_args() # Check if the user input is valid if not isinstance(args.num_blogs, int) or not isinstance(args.keywords, str) or not isinstance(args.niche, bool): - raise TypeError("Invalid user input.") + raise TypeError("Invalid: So, int, str, quotes should be present in command.") # Check if the number of blogs is less than 1 if args.num_blogs < 1: raise ValueError("The number of blogs must be at least 1.") # Print the user input to the console - print(f"Number of blogs: {args.num_blogs}") - print(f"Keywords: {args.keywords}") - print(f"Niche blog: {args.niche}") + logger.info(f"Number of blogs: {args.num_blogs}") + logger.info(f"Keywords: {args.keywords}") + logger.info(f"Niche blog: {args.niche}") return args.num_blogs, args.keywords, args.niche @@ -57,11 +61,11 @@ if __name__ == "__main__": # Check if we have everything, we need to start writing blogs. try: num_blogs, keywords, niche = main() - print(f"returned value: {num_blogs} {keywords}") + logger.info(f"returned value: {num_blogs} {keywords}") except TypeError as e: - print(e) + logger.error(e) except ValueError as e: - print(e) + logger.error(e) else: - print(f"Starting to write {num_blogs} on {keywords}") + logger.info(f"Starting to write {num_blogs} blogs on {keywords}") generate_detailed_blog(num_blogs, keywords, niche)