From b431bfcbd8cb064ec3d552bab61b645db4fc7ba5 Mon Sep 17 00:00:00 2001
From: ajaysi <ajay.calsoft@gmail.com>
Date: Sun, 19 May 2024 14:03:16 +0530
Subject: [PATCH] long-form, AI social, copywriter, prompt config

---
 alwrity.py                                    |   3 +-
 lib/ai_writers/long_form_ai_writer.py         | 182 +++++++-----------
 lib/utils/alwrity_utils.py                    |   4 +-
 lib/workspace/prompts/README.md               |  27 +++
 .../prompts/long_form_ai_writer.prompts       |  86 +++++++++
 main_config                                   |  12 +-
 6 files changed, 197 insertions(+), 117 deletions(-)
 create mode 100644 lib/workspace/prompts/README.md
 create mode 100644 lib/workspace/prompts/long_form_ai_writer.prompts

diff --git a/alwrity.py b/alwrity.py
index a2a70aa5..adc1f330 100644
--- a/alwrity.py
+++ b/alwrity.py
@@ -210,7 +210,7 @@ def check_llm_environs():
 
 def check_internet():
     try:
-        response = requests.get("http://www.google.com", timeout=20)
+        response = requests.get("http://www.google.com", timeout=5)
         if not response.status_code == 200:
             print("💥🤯 WTFish, Internet is NOT available. Enjoy the wilderness..")
             exit(1)
@@ -253,6 +253,7 @@ if __name__ == "__main__":
                                                   f"web_research_report_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}")
     os.environ["IMG_SAVE_DIR"] = os.path.join(os.getcwd(), "lib", "workspace")
     os.environ["CONTENT_SAVE_DIR"] = os.path.join(os.getcwd(), "lib", "workspace")
+    os.environ["PROMPTS_DIR"] = os.path.join(os.getcwd(), "lib", "workspace", "prompts")
 
     load_dotenv(Path('.env'))
     app()
diff --git a/lib/ai_writers/long_form_ai_writer.py b/lib/ai_writers/long_form_ai_writer.py
index d650bc93..ae35a534 100644
--- a/lib/ai_writers/long_form_ai_writer.py
+++ b/lib/ai_writers/long_form_ai_writer.py
@@ -8,8 +8,11 @@
 import os
 import time #iwish
 import sys
+import yaml
 from pathlib import Path
 from dotenv import load_dotenv
+from configparser import ConfigParser
+
 from google.api_core import retry
 import google.generativeai as genai
 from pprint import pprint
@@ -23,7 +26,7 @@ logger.add(sys.stdout,
     )
 
 from ..utils.read_main_config_params import read_return_config_section
-from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search
+from ..ai_web_researcher.gpt_online_researcher import do_metaphor_ai_research
 from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search, do_tavily_ai_search
 from ..blog_metadata.get_blog_metadata import blog_metadata
 from ..blog_postprocessing.save_blog_to_file import save_blog_to_file
@@ -62,109 +65,53 @@ def long_form_generator(content_keywords):
         logger.error(f"Failed to Read config params from main_config: {err}")
         return
 
-    writing_guidelines = f'''\
-    Writing Guidelines
+    try:
+        filepath = os.path.join(os.environ["PROMPTS_DIR"], "long_form_ai_writer.prompts")
+        # Check if file exists
+        if not os.path.exists(filepath):
+            raise FileNotFoundError(f"File {filepath} does not exist")
+        with open(filepath, 'r') as file:
+            prompts = yaml.safe_load(file)
+    except Exception as err:
+        logger.error(f"Exit: Failed to read prompts from {filepath}: {err}")
+        exit(1)
 
-    As an expert Content writer and web researcher, demostrate your world class {content_type} content writing skills.
+    writing_guidelines = prompts.get('writing_guidelines').format(
+        content_language=content_language,
+        content_tone=content_tone,
+        content_type=content_type,
+        output_format=output_format,
+        content_keywords=content_keywords,
+        target_audience=target_audience
+    )
+
+    content_title = prompts.get('content_title').format(
+        content_language=content_language,
+        content_keywords=content_keywords,
+        target_audience=target_audience
+    )
     
-    Follow the below writing guidelines for writing your content:
-    1). You must write in {content_language} language.
-    2). Your content should appeal to target audience of {target_audience}.
-    3). The tone of your content should be consistent for {content_tone}.
-    4). Always ensure orignality and human-like content.
-    5). Use simple {content_language} words, to appeal to all readers.
-    6). Your content must be well formatted using {output_format} language.
-    7). Do not use words like: Unleash, ultimate, Uncover, Discover, Elevate, Revolutionizing, Unveiling, Harnessing, Dive, Delve into, Embrace.
-
-    Remember, your main goal is to write as much as you can. If you get through the content too fast, that is bad. 
-    Expand, never summarize.
-    '''
-
-
-    # Generate prompts
-    content_title = f'''\
-    As an expert {content_language} content writer, specilizing in SEO writing.
-    Your task is to write a blog title following guidelines below:
-
-    1). Write a blog title for given keywords {content_keywords}.
-    2). The title should appeal to audience level of {target_audience}.
-    3). Review the given web research result for {content_keywords}. Your title should compete against them.
-    4). Do not use words like: Unleash, ultimate, Uncover, Discover, Elevate, Revolutionizing, Unveiling, Harnessing, Dive, Delve into, Embrace.
-
-    Web research Result:
-
-    """{{web_research_result}}"""
-
-    '''
-
-    content_outline = f'''\
-    As an expert {content_language} content outliner, specilizing in SEO optimised content.
-    The title of my content is {{content_title}}. I will provide you with its web research results, as context.
-    Your task is write a detailed content outline for the given 'Title', based on the given context.
-
-    Instructions:
-    1). Make sure the outline includes most of the topics from the below given web research results, as context.
-    2). The outline should appeal to audience of {target_audience}.
-    \n
-    web research results is:
+    content_outline = prompts.get('content_outline').format(
+        content_language=content_language,
+        content_title='{content_title}',
+        target_audience=target_audience
+    )
     
-    """{{web_research_result}}"""
-
-    '''
-
-    starting_prompt = f'''\
-    As an expert {content_language} content writer, specilizing in writing SEO optimised content.
+    starting_prompt = prompts.get('starting_prompt').format(
+        content_language=content_language,
+        content_title='{content_title}',
+        content_outline='{content_outline}',
+        writing_guidelines=writing_guidelines
+    )
     
-    Your Content title is:
-
-    """{{content_title}}"""
-
-    The outline of the content is:
-
-    """{{content_outline}}"""
-
-    First, silently review the given content outline and the title. Consider how to start writing your content.
-    Start to write the very beginning of the outline. You are not expected to finish the whole content now. 
-    Your writing should be detailed enough that you are only scratching the surface of the first bullet of your outline. 
-    Try to write AT MINIMUM 600 WORDS.
-
-    """{writing_guidelines}"""
-    '''
-
-    continuation_prompt = f'''\
-    As an expert {content_language} content writer & web researcher, specilizing in writing SEO optimised content.
-
-    Your Content title is:
-
-    """{{content_title}}"""
-
-    The outline of the content is:
-
-    """{{content_outline}}"""
-
-    Relevant web research results:
-
-    """{{web_research_result}}"""
-
-    ============\n
-
-    You've begun to write the content and continue to do so.
-    Here's what you've written so far:
-
-    """{{content_text}}"""
-
-    =====
-
-    First, take your time and silently review the content outline and what you have written so far. 
-    Identify what the single next part of your outline you should write.
-
-    Your task is to continue where you left off and write only the next parts of given outline.
-    You are not expected to finish the whole content now. 
-    Try to write AT MINIMUM 600 WORDS. However, only once the content
-    is COMPLETELY finished, write IAMDONE. Remember, do NOT write a whole sections right now.
-    \n\n
-    {writing_guidelines}
-    '''
+    continuation_prompt = prompts.get('continuation_prompt').format(
+        content_language=content_language,
+        content_title='{content_title}',
+        content_outline='{content_outline}',
+        content_text='{content_text}',
+        web_research_result='{web_research_result}',
+        writing_guidelines=writing_guidelines
+    )
 
     # Configure generative AI
     load_dotenv(Path('../.env'))
@@ -172,8 +119,10 @@ def long_form_generator(content_keywords):
     # Initialize the generative model
     model = genai.GenerativeModel('gemini-pro')
     model_pro = genai.GenerativeModel('gemini-1.5-flash-latest')
+    
     # Do SERP web research for given keywords to generate title and outline.
     web_research_result, g_titles = do_google_serp_search(content_keywords)
+
     # Generate prompts
     try:
         content_title = generate_with_retry(model_pro, content_title.format(web_research_result=web_research_result)).text
@@ -190,8 +139,11 @@ def long_form_generator(content_keywords):
         logger.error(f"Failed to generate content outline: {err}")
 
     try:
-        starting_draft = generate_with_retry(model_pro, 
-                starting_prompt.format(content_title=content_title, content_outline=content_outline)).text
+        starting_draft = generate_with_retry(model_pro, starting_prompt.format(
+                content_title=content_title, 
+                content_outline=content_outline,
+                web_research_result=web_research_result,
+                writing_guidelines=writing_guidelines)).text
     except Exception as err:
         logger.error(f"Failed to Generate Starting draft: {err}")
         return
@@ -199,9 +151,12 @@ def long_form_generator(content_keywords):
     try:
         logger.info(f"Starting to write on the outline introduction.")
         draft = starting_draft
-        continuation = generate_with_retry(model, 
-                continuation_prompt.format(content_title=content_title, 
-                            content_outline=content_outline, content_text=draft, web_research_result=web_research_result)).text
+        continuation = generate_with_retry(model, continuation_prompt.format(
+                content_title=content_title, 
+                content_outline=content_outline, 
+                content_text=draft, 
+                web_research_result=web_research_result,
+                writing_guidelines=writing_guidelines)).text
     except Exception as err:
         logger.error(f"Failed to write the initial draft: {err}")
 
@@ -227,11 +182,22 @@ def long_form_generator(content_keywords):
     logger.info(f"Writing in progress... Current draft length: {len(draft)} characters")
     while 'IAMDONE' not in continuation:
         try:
-            continuation = generate_with_retry(model,
-                    continuation_prompt.format(content_title=content_title,
-                    content_outline=content_outline, content_text=draft, web_research_result=web_research_result)).text
+            web_research_result, m_titles = do_metaphor_ai_research(content_keywords)
+            continuation = generate_with_retry(model, continuation_prompt.format(
+                    content_title=content_title,
+                    content_outline=content_outline, 
+                    content_text=draft, 
+                    web_research_result=web_research_result,
+                    writing_guidelines=writing_guidelines)).text
+
             draft += '\n\n' + continuation
             logger.info(f"Writing in progress... Current draft length: {len(draft)} characters")
+
+            # At this point, the context is little stale. We should more web research on
+            # related queries as per the content outline, to augment the LLM context.
+            # web_research_result, m_titles = do_metaphor_ai_research(content_keywords)
+            #logger.info(f"Doing Tavily Search Again, Should mix with Exa.ai")
+            #web_research_result, m_titles, t_titles = do_tavily_ai_search(content_title)
         except Exception as err:
             logger.error(f"Failed to continually write the Essay: {err}")
             return
diff --git a/lib/utils/alwrity_utils.py b/lib/utils/alwrity_utils.py
index c5022f62..f5a49ee4 100644
--- a/lib/utils/alwrity_utils.py
+++ b/lib/utils/alwrity_utils.py
@@ -103,13 +103,13 @@ def blog_from_keyword():
         try:
             write_blog_from_keywords(content_keywords)
         except Exception as err:
-            print(f"🚫 Failed to write blog on {blog_keywords}, Error: {err}\n")
+            print(f"🚫 Failed to write blog on {content_keywords}, Error: {err}\n")
             exit(1)
     elif choice == "long":
         try:
             long_form_generator(content_keywords)
         except Exception as err:
-            print(f"🚫 Failed to write blog on {blog_keywords}, Error: {err}\n")
+            print(f"🚫 Failed to write blog on {content_keywords}, Error: {err}\n")
             exit(1)
     elif choice == "Experimental":
         try:
diff --git a/lib/workspace/prompts/README.md b/lib/workspace/prompts/README.md
new file mode 100644
index 00000000..f0101f54
--- /dev/null
+++ b/lib/workspace/prompts/README.md
@@ -0,0 +1,27 @@
+# Alwrity Prompts Directory
+
+Welcome to the Alwrity Prompts directory! This folder contains the LLM prompts used by Alwrity AI writers. 
+By editing these prompts, you can customize the output of the AI to better suit your specific needs.
+
+## Overview
+
+1. **Purpose**: This directory provides an easy way to modify prompts for your use cases without hard-coding them into the main code.
+2. **Intuitive Naming**: Each prompt file is named intuitively to help you identify which one to edit. If you're unsure, refer to the log messages when Alwrity executes.
+
+## Instructions
+
+### Important Guidelines
+
+- **Do Not Modify Variables**: Do not change the `{}` strings and other variable names inside the prompts. These placeholders are crucial for the AI to generate the correct output.
+- **Do Not Rename Files**: Keep the file names unchanged to ensure the system can locate and use them correctly.
+- **Maintain YAML Structure**: Follow the YAML file structure to avoid errors. Incorrect formatting can cause the AI to malfunction.
+
+### How to Edit Prompts
+
+1. **Open the YAML File**: Locate and open the prompt file you wish to edit. For example, `long_form.prompts.yaml`.
+
+2. **Modify Prompt Content**: Update the text within the prompts as needed. Be careful to retain the overall YAML structure and placeholders.
+
+3. **Save Changes**: After making your edits, save the file.
+
+**Different language models may require different prompting strategies. The quality of the content generated by the AI is directly influenced by the quality of the prompts you provide.**
diff --git a/lib/workspace/prompts/long_form_ai_writer.prompts b/lib/workspace/prompts/long_form_ai_writer.prompts
new file mode 100644
index 00000000..fb4b48c6
--- /dev/null
+++ b/lib/workspace/prompts/long_form_ai_writer.prompts
@@ -0,0 +1,86 @@
+writing_guidelines: |
+  As an expert content writer and web researcher, demonstrate your world-class {content_type} content writing skills.
+  Follow these writing guidelines:
+  1. Write in {content_language} language.
+  2. Ensure your content appeals to the target audience of {target_audience}.
+  3. Maintain a consistent tone of {content_tone} throughout.
+  4. Ensure originality and human-like content.
+  5. Use simple {content_language} words to appeal to all readers.
+  6. Format your content using {output_format}.
+  7. Avoid words like: Unleash, ultimate, uncover, discover, elevate, revolutionizing, unveiling, harnessing, dive, delve into, embrace.
+  Remember, your main goal is to write as much as you can. Expanding content is good; summarizing is bad.
+  8). Always use the given web research results, in your writing.
+
+
+
+content_title: |
+  As an expert {content_language} content writer specializing in SEO writing, your task is to create a blog title following these guidelines:
+  1. Write a blog title for the given keywords: {content_keywords}.
+  2. Ensure the title appeals to the target audience of {target_audience}.
+  3. Review the provided web research results for {content_keywords}. Ensure your title competes effectively against them.
+  4. Avoid words like: Unleash, ultimate, uncover, discover, elevate, revolutionizing, unveiling, harnessing, dive, delve into, embrace.
+  Web research results:
+  """{{web_research_result}}"""
+
+
+
+content_outline: |
+  As an expert {content_language} content outliner specializing in SEO-optimized content, create a detailed content outline for the given title based on the provided context.
+  Title: {{content_title}}
+  Instructions:
+  1. Include most of the topics from the given web research results as context.
+  2. Ensure the outline appeals to the target audience of {target_audience}.
+  Web research results:
+  """{{web_research_result}}"""
+
+
+
+starting_prompt: |
+  As an expert {content_language} content writer specializing in SEO-optimized content, begin writing the content for the given title and outline.
+
+  Title:
+  """{{content_title}}"""
+
+  Outline:
+  """{{content_outline}}"""
+
+  Relevant web research results:
+  """{{web_research_result}}"""
+
+  ------------
+
+  First, silently review the content outline and title. Consider how to begin writing your content.
+  Start by writing the very beginning of the outline. You are not expected to finish the entire content now.
+  Your writing should be detailed, only scratching the surface of the first bullet point of your outline.
+  Write a minimum of 700 words.
+
+  """{{writing_guidelines}}"""
+
+
+
+continuation_prompt: |
+  As an expert {content_language} content writer and web researcher specializing in SEO-optimized content, continue writing the content for the given title and outline.
+
+  Title:
+  """{{content_title}}"""
+
+  Outline:
+  """{{content_outline}}"""
+
+  Relevant web research results:
+  """{{web_research_result}}"""
+
+  ===========
+
+  You've begun writing the content. Continue from where you left off.
+  Here's what you've written so far:
+  """{{content_text}}"""
+
+  =====
+  First, silently review the content outline and what you've written so far.
+  Identify the next part of your outline to write.
+  Continue from where you left off, focusing only on the next parts of the outline.
+  You are not expected to finish the entire content now.
+  Write a minimum of 700 words. Once the content is completely finished, write IAMDONE. Remember, do NOT write entire sections right now.
+
+  """{{writing_guidelines}}"""
diff --git a/main_config b/main_config
index 2de63913..cdbf825d 100644
--- a/main_config
+++ b/main_config
@@ -9,15 +9,15 @@
 [blog_characteristics]
 
 # Length of blogs Or word count. Note: It wont be exact and depends on GPT providers and Max token count.
-blog_length = 1200
+blog_length = 2000
 
 # company/brand-name
 
 # professional, how-to, begginer, research, programming, casual, etc
-blog_tone = "Professional"
+blog_tone = "Casual"
 
 # Target Audience, Gen-Z, Tech-savvy, Working professional, students, kids etc
-blog_demographic = "Students"
+blog_demographic = "Content creators & Digital marketing"
 
 # informational, commercial, company, news, finance, competitor, programming, scholar etc
 blog_type = "Informational"
@@ -59,12 +59,12 @@ num_images = 1
 gpt_provider = google
 
 # Mention which model of the above provider to use.
-model = gpt-3.5-turbo-0125
+model = gemini-1.5-flash-latest
 
 # Temperature is a parameter that controls the “creativity” or randomness of the text generated by GPT.
 # greater determinism and higher values indicating more randomness.
 # while a lower temperature (e.g., 0.2) makes the output more deterministic and focused (thus, getting flagged as AI content).
-temperature = 0.6
+temperature = 0.7
 
 # Top-p sampling is particularly useful in scenarios where you want to control the level of diversity in the generated text. 
 # By adjusting the threshold p, you can influence the diversity of the generated sequences. 
@@ -124,7 +124,7 @@ time_range = anytime
 
 # include_domains (Give Full URLs, separate by comma): A list of domains to specifically include in the search results. 
 # Default is None, which includes all domains. Example: https://wikipedia.com,https://stackoverflow.com,google schalor,reddit etc
-include_domains =
+include_domains = https://alwrity.com
 
 # similar_url : A single URL, this will instruct search engines to give results similar to the given URL.
 similar_url =