From b431bfcbd8cb064ec3d552bab61b645db4fc7ba5 Mon Sep 17 00:00:00 2001 From: ajaysi Date: Sun, 19 May 2024 14:03:16 +0530 Subject: [PATCH] long-form, AI social, copywriter, prompt config --- alwrity.py | 3 +- lib/ai_writers/long_form_ai_writer.py | 182 +++++++----------- lib/utils/alwrity_utils.py | 4 +- lib/workspace/prompts/README.md | 27 +++ .../prompts/long_form_ai_writer.prompts | 86 +++++++++ main_config | 12 +- 6 files changed, 197 insertions(+), 117 deletions(-) create mode 100644 lib/workspace/prompts/README.md create mode 100644 lib/workspace/prompts/long_form_ai_writer.prompts diff --git a/alwrity.py b/alwrity.py index a2a70aa5..adc1f330 100644 --- a/alwrity.py +++ b/alwrity.py @@ -210,7 +210,7 @@ def check_llm_environs(): def check_internet(): try: - response = requests.get("http://www.google.com", timeout=20) + response = requests.get("http://www.google.com", timeout=5) if not response.status_code == 200: print("💥🤯 WTFish, Internet is NOT available. Enjoy the wilderness..") exit(1) @@ -253,6 +253,7 @@ if __name__ == "__main__": f"web_research_report_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}") os.environ["IMG_SAVE_DIR"] = os.path.join(os.getcwd(), "lib", "workspace") os.environ["CONTENT_SAVE_DIR"] = os.path.join(os.getcwd(), "lib", "workspace") + os.environ["PROMPTS_DIR"] = os.path.join(os.getcwd(), "lib", "workspace", "prompts") load_dotenv(Path('.env')) app() diff --git a/lib/ai_writers/long_form_ai_writer.py b/lib/ai_writers/long_form_ai_writer.py index d650bc93..ae35a534 100644 --- a/lib/ai_writers/long_form_ai_writer.py +++ b/lib/ai_writers/long_form_ai_writer.py @@ -8,8 +8,11 @@ import os import time #iwish import sys +import yaml from pathlib import Path from dotenv import load_dotenv +from configparser import ConfigParser + from google.api_core import retry import google.generativeai as genai from pprint import pprint @@ -23,7 +26,7 @@ logger.add(sys.stdout, ) from ..utils.read_main_config_params import read_return_config_section -from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search +from ..ai_web_researcher.gpt_online_researcher import do_metaphor_ai_research from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search, do_tavily_ai_search from ..blog_metadata.get_blog_metadata import blog_metadata from ..blog_postprocessing.save_blog_to_file import save_blog_to_file @@ -62,109 +65,53 @@ def long_form_generator(content_keywords): logger.error(f"Failed to Read config params from main_config: {err}") return - writing_guidelines = f'''\ - Writing Guidelines + try: + filepath = os.path.join(os.environ["PROMPTS_DIR"], "long_form_ai_writer.prompts") + # Check if file exists + if not os.path.exists(filepath): + raise FileNotFoundError(f"File {filepath} does not exist") + with open(filepath, 'r') as file: + prompts = yaml.safe_load(file) + except Exception as err: + logger.error(f"Exit: Failed to read prompts from {filepath}: {err}") + exit(1) - As an expert Content writer and web researcher, demostrate your world class {content_type} content writing skills. + writing_guidelines = prompts.get('writing_guidelines').format( + content_language=content_language, + content_tone=content_tone, + content_type=content_type, + output_format=output_format, + content_keywords=content_keywords, + target_audience=target_audience + ) + + content_title = prompts.get('content_title').format( + content_language=content_language, + content_keywords=content_keywords, + target_audience=target_audience + ) - Follow the below writing guidelines for writing your content: - 1). You must write in {content_language} language. - 2). Your content should appeal to target audience of {target_audience}. - 3). The tone of your content should be consistent for {content_tone}. - 4). Always ensure orignality and human-like content. - 5). Use simple {content_language} words, to appeal to all readers. - 6). Your content must be well formatted using {output_format} language. - 7). Do not use words like: Unleash, ultimate, Uncover, Discover, Elevate, Revolutionizing, Unveiling, Harnessing, Dive, Delve into, Embrace. - - Remember, your main goal is to write as much as you can. If you get through the content too fast, that is bad. - Expand, never summarize. - ''' - - - # Generate prompts - content_title = f'''\ - As an expert {content_language} content writer, specilizing in SEO writing. - Your task is to write a blog title following guidelines below: - - 1). Write a blog title for given keywords {content_keywords}. - 2). The title should appeal to audience level of {target_audience}. - 3). Review the given web research result for {content_keywords}. Your title should compete against them. - 4). Do not use words like: Unleash, ultimate, Uncover, Discover, Elevate, Revolutionizing, Unveiling, Harnessing, Dive, Delve into, Embrace. - - Web research Result: - - """{{web_research_result}}""" - - ''' - - content_outline = f'''\ - As an expert {content_language} content outliner, specilizing in SEO optimised content. - The title of my content is {{content_title}}. I will provide you with its web research results, as context. - Your task is write a detailed content outline for the given 'Title', based on the given context. - - Instructions: - 1). Make sure the outline includes most of the topics from the below given web research results, as context. - 2). The outline should appeal to audience of {target_audience}. - \n - web research results is: + content_outline = prompts.get('content_outline').format( + content_language=content_language, + content_title='{content_title}', + target_audience=target_audience + ) - """{{web_research_result}}""" - - ''' - - starting_prompt = f'''\ - As an expert {content_language} content writer, specilizing in writing SEO optimised content. + starting_prompt = prompts.get('starting_prompt').format( + content_language=content_language, + content_title='{content_title}', + content_outline='{content_outline}', + writing_guidelines=writing_guidelines + ) - Your Content title is: - - """{{content_title}}""" - - The outline of the content is: - - """{{content_outline}}""" - - First, silently review the given content outline and the title. Consider how to start writing your content. - Start to write the very beginning of the outline. You are not expected to finish the whole content now. - Your writing should be detailed enough that you are only scratching the surface of the first bullet of your outline. - Try to write AT MINIMUM 600 WORDS. - - """{writing_guidelines}""" - ''' - - continuation_prompt = f'''\ - As an expert {content_language} content writer & web researcher, specilizing in writing SEO optimised content. - - Your Content title is: - - """{{content_title}}""" - - The outline of the content is: - - """{{content_outline}}""" - - Relevant web research results: - - """{{web_research_result}}""" - - ============\n - - You've begun to write the content and continue to do so. - Here's what you've written so far: - - """{{content_text}}""" - - ===== - - First, take your time and silently review the content outline and what you have written so far. - Identify what the single next part of your outline you should write. - - Your task is to continue where you left off and write only the next parts of given outline. - You are not expected to finish the whole content now. - Try to write AT MINIMUM 600 WORDS. However, only once the content - is COMPLETELY finished, write IAMDONE. Remember, do NOT write a whole sections right now. - \n\n - {writing_guidelines} - ''' + continuation_prompt = prompts.get('continuation_prompt').format( + content_language=content_language, + content_title='{content_title}', + content_outline='{content_outline}', + content_text='{content_text}', + web_research_result='{web_research_result}', + writing_guidelines=writing_guidelines + ) # Configure generative AI load_dotenv(Path('../.env')) @@ -172,8 +119,10 @@ def long_form_generator(content_keywords): # Initialize the generative model model = genai.GenerativeModel('gemini-pro') model_pro = genai.GenerativeModel('gemini-1.5-flash-latest') + # Do SERP web research for given keywords to generate title and outline. web_research_result, g_titles = do_google_serp_search(content_keywords) + # Generate prompts try: content_title = generate_with_retry(model_pro, content_title.format(web_research_result=web_research_result)).text @@ -190,8 +139,11 @@ def long_form_generator(content_keywords): logger.error(f"Failed to generate content outline: {err}") try: - starting_draft = generate_with_retry(model_pro, - starting_prompt.format(content_title=content_title, content_outline=content_outline)).text + starting_draft = generate_with_retry(model_pro, starting_prompt.format( + content_title=content_title, + content_outline=content_outline, + web_research_result=web_research_result, + writing_guidelines=writing_guidelines)).text except Exception as err: logger.error(f"Failed to Generate Starting draft: {err}") return @@ -199,9 +151,12 @@ def long_form_generator(content_keywords): try: logger.info(f"Starting to write on the outline introduction.") draft = starting_draft - continuation = generate_with_retry(model, - continuation_prompt.format(content_title=content_title, - content_outline=content_outline, content_text=draft, web_research_result=web_research_result)).text + continuation = generate_with_retry(model, continuation_prompt.format( + content_title=content_title, + content_outline=content_outline, + content_text=draft, + web_research_result=web_research_result, + writing_guidelines=writing_guidelines)).text except Exception as err: logger.error(f"Failed to write the initial draft: {err}") @@ -227,11 +182,22 @@ def long_form_generator(content_keywords): logger.info(f"Writing in progress... Current draft length: {len(draft)} characters") while 'IAMDONE' not in continuation: try: - continuation = generate_with_retry(model, - continuation_prompt.format(content_title=content_title, - content_outline=content_outline, content_text=draft, web_research_result=web_research_result)).text + web_research_result, m_titles = do_metaphor_ai_research(content_keywords) + continuation = generate_with_retry(model, continuation_prompt.format( + content_title=content_title, + content_outline=content_outline, + content_text=draft, + web_research_result=web_research_result, + writing_guidelines=writing_guidelines)).text + draft += '\n\n' + continuation logger.info(f"Writing in progress... Current draft length: {len(draft)} characters") + + # At this point, the context is little stale. We should more web research on + # related queries as per the content outline, to augment the LLM context. + # web_research_result, m_titles = do_metaphor_ai_research(content_keywords) + #logger.info(f"Doing Tavily Search Again, Should mix with Exa.ai") + #web_research_result, m_titles, t_titles = do_tavily_ai_search(content_title) except Exception as err: logger.error(f"Failed to continually write the Essay: {err}") return diff --git a/lib/utils/alwrity_utils.py b/lib/utils/alwrity_utils.py index c5022f62..f5a49ee4 100644 --- a/lib/utils/alwrity_utils.py +++ b/lib/utils/alwrity_utils.py @@ -103,13 +103,13 @@ def blog_from_keyword(): try: write_blog_from_keywords(content_keywords) except Exception as err: - print(f"🚫 Failed to write blog on {blog_keywords}, Error: {err}\n") + print(f"🚫 Failed to write blog on {content_keywords}, Error: {err}\n") exit(1) elif choice == "long": try: long_form_generator(content_keywords) except Exception as err: - print(f"🚫 Failed to write blog on {blog_keywords}, Error: {err}\n") + print(f"🚫 Failed to write blog on {content_keywords}, Error: {err}\n") exit(1) elif choice == "Experimental": try: diff --git a/lib/workspace/prompts/README.md b/lib/workspace/prompts/README.md new file mode 100644 index 00000000..f0101f54 --- /dev/null +++ b/lib/workspace/prompts/README.md @@ -0,0 +1,27 @@ +# Alwrity Prompts Directory + +Welcome to the Alwrity Prompts directory! This folder contains the LLM prompts used by Alwrity AI writers. +By editing these prompts, you can customize the output of the AI to better suit your specific needs. + +## Overview + +1. **Purpose**: This directory provides an easy way to modify prompts for your use cases without hard-coding them into the main code. +2. **Intuitive Naming**: Each prompt file is named intuitively to help you identify which one to edit. If you're unsure, refer to the log messages when Alwrity executes. + +## Instructions + +### Important Guidelines + +- **Do Not Modify Variables**: Do not change the `{}` strings and other variable names inside the prompts. These placeholders are crucial for the AI to generate the correct output. +- **Do Not Rename Files**: Keep the file names unchanged to ensure the system can locate and use them correctly. +- **Maintain YAML Structure**: Follow the YAML file structure to avoid errors. Incorrect formatting can cause the AI to malfunction. + +### How to Edit Prompts + +1. **Open the YAML File**: Locate and open the prompt file you wish to edit. For example, `long_form.prompts.yaml`. + +2. **Modify Prompt Content**: Update the text within the prompts as needed. Be careful to retain the overall YAML structure and placeholders. + +3. **Save Changes**: After making your edits, save the file. + +**Different language models may require different prompting strategies. The quality of the content generated by the AI is directly influenced by the quality of the prompts you provide.** diff --git a/lib/workspace/prompts/long_form_ai_writer.prompts b/lib/workspace/prompts/long_form_ai_writer.prompts new file mode 100644 index 00000000..fb4b48c6 --- /dev/null +++ b/lib/workspace/prompts/long_form_ai_writer.prompts @@ -0,0 +1,86 @@ +writing_guidelines: | + As an expert content writer and web researcher, demonstrate your world-class {content_type} content writing skills. + Follow these writing guidelines: + 1. Write in {content_language} language. + 2. Ensure your content appeals to the target audience of {target_audience}. + 3. Maintain a consistent tone of {content_tone} throughout. + 4. Ensure originality and human-like content. + 5. Use simple {content_language} words to appeal to all readers. + 6. Format your content using {output_format}. + 7. Avoid words like: Unleash, ultimate, uncover, discover, elevate, revolutionizing, unveiling, harnessing, dive, delve into, embrace. + Remember, your main goal is to write as much as you can. Expanding content is good; summarizing is bad. + 8). Always use the given web research results, in your writing. + + + +content_title: | + As an expert {content_language} content writer specializing in SEO writing, your task is to create a blog title following these guidelines: + 1. Write a blog title for the given keywords: {content_keywords}. + 2. Ensure the title appeals to the target audience of {target_audience}. + 3. Review the provided web research results for {content_keywords}. Ensure your title competes effectively against them. + 4. Avoid words like: Unleash, ultimate, uncover, discover, elevate, revolutionizing, unveiling, harnessing, dive, delve into, embrace. + Web research results: + """{{web_research_result}}""" + + + +content_outline: | + As an expert {content_language} content outliner specializing in SEO-optimized content, create a detailed content outline for the given title based on the provided context. + Title: {{content_title}} + Instructions: + 1. Include most of the topics from the given web research results as context. + 2. Ensure the outline appeals to the target audience of {target_audience}. + Web research results: + """{{web_research_result}}""" + + + +starting_prompt: | + As an expert {content_language} content writer specializing in SEO-optimized content, begin writing the content for the given title and outline. + + Title: + """{{content_title}}""" + + Outline: + """{{content_outline}}""" + + Relevant web research results: + """{{web_research_result}}""" + + ------------ + + First, silently review the content outline and title. Consider how to begin writing your content. + Start by writing the very beginning of the outline. You are not expected to finish the entire content now. + Your writing should be detailed, only scratching the surface of the first bullet point of your outline. + Write a minimum of 700 words. + + """{{writing_guidelines}}""" + + + +continuation_prompt: | + As an expert {content_language} content writer and web researcher specializing in SEO-optimized content, continue writing the content for the given title and outline. + + Title: + """{{content_title}}""" + + Outline: + """{{content_outline}}""" + + Relevant web research results: + """{{web_research_result}}""" + + =========== + + You've begun writing the content. Continue from where you left off. + Here's what you've written so far: + """{{content_text}}""" + + ===== + First, silently review the content outline and what you've written so far. + Identify the next part of your outline to write. + Continue from where you left off, focusing only on the next parts of the outline. + You are not expected to finish the entire content now. + Write a minimum of 700 words. Once the content is completely finished, write IAMDONE. Remember, do NOT write entire sections right now. + + """{{writing_guidelines}}""" diff --git a/main_config b/main_config index 2de63913..cdbf825d 100644 --- a/main_config +++ b/main_config @@ -9,15 +9,15 @@ [blog_characteristics] # Length of blogs Or word count. Note: It wont be exact and depends on GPT providers and Max token count. -blog_length = 1200 +blog_length = 2000 # company/brand-name # professional, how-to, begginer, research, programming, casual, etc -blog_tone = "Professional" +blog_tone = "Casual" # Target Audience, Gen-Z, Tech-savvy, Working professional, students, kids etc -blog_demographic = "Students" +blog_demographic = "Content creators & Digital marketing" # informational, commercial, company, news, finance, competitor, programming, scholar etc blog_type = "Informational" @@ -59,12 +59,12 @@ num_images = 1 gpt_provider = google # Mention which model of the above provider to use. -model = gpt-3.5-turbo-0125 +model = gemini-1.5-flash-latest # Temperature is a parameter that controls the “creativity” or randomness of the text generated by GPT. # greater determinism and higher values indicating more randomness. # while a lower temperature (e.g., 0.2) makes the output more deterministic and focused (thus, getting flagged as AI content). -temperature = 0.6 +temperature = 0.7 # Top-p sampling is particularly useful in scenarios where you want to control the level of diversity in the generated text. # By adjusting the threshold p, you can influence the diversity of the generated sequences. @@ -124,7 +124,7 @@ time_range = anytime # include_domains (Give Full URLs, separate by comma): A list of domains to specifically include in the search results. # Default is None, which includes all domains. Example: https://wikipedia.com,https://stackoverflow.com,google schalor,reddit etc -include_domains = +include_domains = https://alwrity.com # similar_url : A single URL, this will instruct search engines to give results similar to the given URL. similar_url =