Blogen-V0.1 Added features. WIP

2023-12-21 21:21:09 +05:30
parent eaf13c2d16
commit 8f89de7b69
21 changed files with 775 additions and 471 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -16,4 +16,4 @@ pseo-experiments/lib/python3.10/
 pseo-experiments/bin/
 blog_images/
 blogs/
-pseo_website/
+.env
--- a/lib/.gpte_consent
+++ b/lib/.gpte_consent
@@ -1 +0,0 @@
 true
--- a/lib/combine_research_and_blog.py
+++ b/lib/combine_research_and_blog.py
@@ -1,31 +1,35 @@
 from .gpt_providers.openai_chat_completion import openai_chatgpt
 def blog_with_research(report, blog):
    """Combine the given online research and gpt blog content"""
    prompt = f"""
        You are an expert copywriter specializing in content optimization for SEO.
-        I will provide you with a research report and a blog content on the same topic.
+        I will provide you with a 'research report' and a 'blog content' on the same topic.
        Treat the research report as the context for the blog and better it accordingly.
        Your task is to transform and combine the given research and blog content into a well-structured, unique
-        and engaging blog article. 
+        and engaging blog article.
        Your objectives include:
        1. Master the report and blog content: Understand main ideas, key points, and the core message.
        2. Sentence Structure: Rephrase while preserving logical flow and coherence.
-        3. Identify Main Keyword: Determine the primary topic and combine the articles on the main topic.
+        3. Identify Main Keywords: Determine the primary topic and combine the articles on the main topic.
-        4. Keyword Integration: Naturally integrate keywords in headings, subheadings, and body text, avoiding overuse.
+        4. REMEMBER: From the research report, include links and cititations to make your article more authoratative.
-        5. Write Unique Content: Avoid direct copying from given report and blog; rewrite in your own words and style.
+        5. Write Code snippets: Check if given report is on programming, then write code snippets where applicable.
-        6. Optimize for SEO: Generate high quality informative content. 
+        6. Optimize for SEO: Generate high quality informative content.
        Implement SEO best practises with appropriate keyword density.
        7. Craft Engaging and Informative Article: Provide value and insight to readers.
        8. Proofread: Important to Check for grammar, spelling, and punctuation errors.
-        9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, 
+        9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases,
        interjections, and colloquialisms. Avoid repetitive phrases and unnatural sentence structures.
-        10. Structuring: Include an Introduction, subtopics and use bullet points or 
+        10. Blog Structuring: Include an Introduction, subtopics and use bullet points or
-        numbered lists if appropriate. Important to include FAQs, and Conclusion.
+        numbered lists if appropriate. Important to include FAQs, Conclusion and Referances.
        11. Ensure Uniqueness: Guarantee the article is plagiarism-free. Write in unique, informative style.
        12. Punctuation: Use appropriate question marks at the end of questions.
        13. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
-        14. REMEMBER to give final response as complete HTML.
+        14. REMEMBER: Use the formatting style of given research report and include citations, referances in combined article.
-        Follow these guidelines to create a well-optimized, unique, and informative article 
+
                Follow these guidelines to combine and write a new, unique, and informative blog article
        that will rank well in search engine results and engage readers effectively.
        Create a blog post from the given research report and blog content below.
@@ -37,4 +41,4 @@ def blog_with_research(report, blog):
        response = openai_chatgpt(prompt)
        return response
    except Exception as err:
-        SystemError(f"Error in combining research report and blog content.")
+        SystemError(f"Error in combining blog and research report.")
--- a/lib/convert_content_to_markdown.py
+++ b/lib/convert_content_to_markdown.py
@@ -1,3 +1,6 @@
 from .gpt_providers.openai_chat_completion import openai_chatgpt
 def convert_tomarkdown_format(blog_content):
    """ Helper for converting content to markdown format for static sites. """
    prompt = f"""
@@ -17,6 +20,7 @@ def convert_tomarkdown_format(blog_content):
    Specify a language for syntax highlighting.
    6). Horizontal Lines: Create a horizontal line using three or more asterisks, dashes, or underscores (---, ***).
    7). Table Formatting: Use pipes | and dashes - to create tables. Align text with colons.
    8). Remember to use suitable emojis for the given blog content.
    Convert the given blog content in well organised markdown content: {blog_content}"""
    try:
--- a/lib/convert_markdown_to_html.py
+++ b/lib/convert_markdown_to_html.py
@@ -1,3 +1,5 @@
 from .gpt_providers.openai_chat_completion import openai_chatgpt
 def convert_markdown_to_html(md_content):
    """ Helper function to convert given text to HTML
    """
--- a/lib/get_blog_meta_desc.py
+++ b/lib/get_blog_meta_desc.py
@@ -1,17 +1,31 @@
-def generate_blog_description(blog_content):
+from .gpt_providers.openai_chat_completion import openai_chatgpt
 import google.generativeai as genai
 def generate_blog_description(blog_content, gpt_providers):
    """
        Prompt designed to give SEO optimized blog descripton
    """
    prompt = f"""As an expert SEO and blog writer, Compose a compelling meta description for the given blog content, 
-        adhering to SEO best practices. Keep it between 150-160 characters, incorporating active verbs, 
+        adhering to SEO best practices. Keep it between 150-160 characters. 
-        avoiding all caps and excessive punctuation. Ensure relevance, engage users, and encourage clicks.
+        Provide a glimpse of the content's value to entice readers.
        Use keywords naturally and provide a glimpse of the content's value to entice readers.
        Respond with only one of your best effort and do not include your explanations. 
        Blog Content: {blog_content}"""
-
+    if 'gemini' in gpt_providers:
-    try:
+        try:
-        # TBD: Add logic for which_provider and which_model
+            genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
-        response = openai_chatgpt(prompt)
+        except Exception as err:
-        return response
+            logger.error("Failed in getting GEMINI_API_KEY")
-    except Exception as err:
+        # Use gemini-pro model for text and image.
-        SystemError(f"Error in generating blog description: {err}")
+        model = genai.GenerativeModel('gemini-pro')
        try:
            response = model.generate_content(prompt)
            return response.text
        except Exception as err:
            logger.error("Failed to get response from gemini.")
    elif 'openai' in gpt_providers:
        try:
            response = openai_chatgpt(prompt)
            return response
        except Exception as err:
            SystemError(f"Error in generating blog summary: {err}")
--- a/lib/get_blog_tags.py
+++ b/lib/get_blog_tags.py
@@ -1,16 +0,0 @@
 def get_blog_tags(blog_article):
    """
        Function to suggest tags for the given blog content
    """
    # Suggest at least 5 tags for the following blog post [Enter your blog post text here].
    prompt = f"""As an expert SEO and blog writer, suggest only 2 relevant and specific blog tags
         for the given blog content. Only reply with comma separated values. 
         Blog content:  {blog_article}."""
    try:
        # TBD: Add logic for which_provider and which_model
        response = openai_chatgpt(prompt)
    except Exception as err:
        SystemError(f"Error in generating blog tags: {err}")
    else:
        return response
--- a/lib/get_blog_title.py
+++ b/lib/get_blog_title.py
@@ -1,20 +1,33 @@
-def generate_blog_title(blog_meta_desc):
+from .gpt_providers.openai_chat_completion import openai_chatgpt
 import google.generativeai as genai
 def generate_blog_title(blog_meta_desc, gpt_providers):
    """
    Given a blog title generate an outline for it
    """
    # TBD: Remove hardcoding, make dynamic
    prompt = f"""As a SEO expert and content writer, I will provide you with meta description of blog. 
        Your task is write a SEO optimized, call to action and engaging blog title for it.
        Follows SEO best practises to suggest the blog title. 
        Please keep the titles concise, not exceeding 60 words, and ensure to maintain their meaning. 
        Respond with only one title and no description or keyword like Title: 
-        Generate blog title for this given blog content: {blog_meta_desc}
+        Generate blog title for this given meta description: {blog_meta_desc}
        """
-    # The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
+   if 'gemini' in gpt_providers:
-    # TBD: Include --niche
+        try:
-    logger.info(f"Prompt used for blog title :{prompt}")
+            genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
-    try:
+        except Exception as err:
-        response = openai_chatgpt(prompt)
+            logger.error("Failed in getting GEMINI_API_KEY")
-    except Exception as err:
+        # Use gemini-pro model for text and image.
-        SystemError(f"Error in generating Blog Title: {err}")
+        model = genai.GenerativeModel('gemini-pro')
-    return response
+        try:
            response = model.generate_content(prompt)
            return response.text
        except Exception as err:
            logger.error("Failed to get response from gemini.")
    elif 'openai' in gpt_providers:
        try:
            response = openai_chatgpt(prompt)
            return response
        except Exception as err:
            SystemError(f"Error in generating blog summary: {err}") 
--- a/lib/get_code_examples.py
+++ b/lib/get_code_examples.py
@@ -0,0 +1,47 @@
 """
 At the command line, only need to run once to install the package via pip:
 $ pip install google-generativeai
 """
 import google.generativeai as genai
 genai.configure(api_key="YOUR_API_KEY")
 # Set up the model
 generation_config = {
  "temperature": 1,
  "top_p": 1,
  "top_k": 1,
  "max_output_tokens": 2048,
 }
 safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  }
 ]
 model = genai.GenerativeModel(model_name="gemini-pro",
                              generation_config=generation_config,
                              safety_settings=safety_settings)
 prompt_parts = [
  "As an expert programmer and web researcher, I will provide you with blog content. Your task is to understand the blog content and do web research around the main keywords. Check if the blog content is about programming then provide me with original code examples, relevant to the blog content. The provided code example should be of high coding standards, include docstring and follow pep8 standards. Do not provide explanations for your response.\nBlog content: \"\"\" {blog_content} \"\"\"\n ",
 ]
 response = model.generate_content(prompt_parts)
 print(response.text)
--- a/lib/get_tags.py
+++ b/lib/get_tags.py
@@ -0,0 +1,31 @@
 from .gpt_providers.openai_chat_completion import openai_chatgpt
 import google.generativeai as genai
 def get_blog_tags(blog_article, gpt_providers):
    """
        Function to suggest tags for the given blog content
    """
    # Suggest at least 5 tags for the following blog post [Enter your blog post text here].
    prompt = f"""As an expert SEO and blog writer, suggest only 2 relevant and specific blog tags
         for the given blog content. Only reply with comma separated values. 
         Blog content:  {blog_article}."""
   if 'gemini' in gpt_providers:
        try:
            genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
        except Exception as err:
            logger.error("Failed in getting GEMINI_API_KEY")
        # Use gemini-pro model for text and image.
        model = genai.GenerativeModel('gemini-pro')
        try:
            response = model.generate_content(prompt)
            return response.text
        except Exception as err:
            logger.error("Failed to get response from gemini.")
    elif 'openai' in gpt_providers:
        try:
            response = openai_chatgpt(prompt)
            return response
        except Exception as err:
            SystemError(f"Error in generating blog summary: {err}") 
--- a/lib/get_text_response.py
+++ b/lib/get_text_response.py
@@ -17,10 +17,6 @@ from tqdm import tqdm, trange
 import time
 import re
 from textwrap import dedent
 import nltk
 nltk.download('punkt', quiet=True)
 from nltk.corpus import stopwords
 nltk.download('stopwords', quiet=True)
 from .gpt_providers.openai_gpt_provider import gen_new_from_given_img
 from .gpt_providers.openai_chat_completion import openai_chatgpt
@@ -29,6 +25,15 @@ from .generate_image_from_prompt import generate_image
 from .write_blogs_from_youtube_videos import youtube_to_blog
 from .wordpress_blog_uploader import compress_image, upload_blog_post, upload_media
 from .gpt_online_researcher import do_online_research
 from .save_blog_to_file import save_blog_to_file
 from .optimize_images_for_upload import optimize_image
 from .combine_research_and_blog import blog_with_research
 from .get_blog_meta_desc import generate_blog_description
 from .get_blog_title import generate_blog_title
 from .get_tags import get_blog_tags
 from .get_blog_category import get_blog_categories
 from .convert_content_to_markdown import convert_tomarkdown_format
 from .convert_markdown_to_html import convert_markdown_to_html
 from loguru import logger
 logger.remove()
@@ -49,8 +54,8 @@ image_dir = os.path.join(os.getcwd(), image_dir)
 output_path = "blogs"
 output_path = os.path.join(os.getcwd(), output_path)
 wordpress_url = ''
-wordpress_username = 'upaudel750'
+wordpress_username = ''
-wordpress_password = 'YvCS VbzQ QSp8 4XZe 0DUw Myys'
+wordpress_password = ''
 def generate_youtube_blog(yt_url_list, output_format="markdown"):
@@ -61,8 +66,11 @@ def generate_youtube_blog(yt_url_list, output_format="markdown"):
    for a_yt_url in yt_url_list:
        try:
            logger.info(f"Starting to write blog on URL: {a_yt_url}")
-            yt_blog, yt_title = youtube_to_blog(a_yt_url)
+            blog_markdown_str, yt_title = youtube_to_blog(a_yt_url)
-            if not yt_title or not yt_blog:
+            logger.warning("\n\n--------------- First Draft of the Blog: --------\n\n")
            logger.info(f"{blog_markdown_str}\n")
            logger.warning("--------------------END of First draft----------\n\n")
            if not yt_title or not blog_markdown_str:
                logger.error("No content or title for audio to proceed.")
                sys.exit(1)
        except Exception as e:
@@ -75,24 +83,32 @@ def generate_youtube_blog(yt_url_list, output_format="markdown"):
            if not research_report:
                logger.error(f"Error in do_online_research returned no report: {e}")
                sys.exit(1)
            logger.warning(f"\n\n---------------Online Research Report: {yt_title} --------\n\n")
            logger.info(f"{research_report}\n")
            logger.warning("--------------------END of Research Report----------\n\n")
        except Exception as e:
            logger.error(f"Error in do_online_research: {e}")
            sys.exit(1)
        try:
            # Note: Check if the order of input matters for your function
            logger.info("Preparing a blog content from audio script and online research content...")
-            blog_with_research(research_report, yt_blog)
+            blog_markdown_str = blog_with_research(research_report, blog_markdown_str)
            logger.warning("\n\n--------------- Second Blog Draft after online research: --------\n\n")
            logger.info(f"{blog_markdown_str}\n")
            logger.warning("--------------------END of Second draft----------\n\n")
        except Exception as e:
            logger.error(f"Error in blog_with_research: {e}")
            sys.exit(1)
        try:
            # Get the title and meta description of the blog.
-            blog_meta_desc = generate_blog_description(yt_blog)
+            blog_meta_desc = generate_blog_description(blog_markdown_str, "gemini")
-            title = generate_blog_title(blog_meta_desc)
+            title = generate_blog_title(blog_meta_desc, "gemini")
            logger.info(f"Title is {title} and description is {blog_meta_desc}")
-            blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
+            # Regex pattern to match 'Title:', 'title:', 'TITLE:', etc., followed by optional whitespace
            title = re.sub(re.compile(r'(?i)title:\s*'), '', title)
            #blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
            # Get blog tags and categories.
            blog_tags = get_blog_tags(blog_meta_desc)
            logger.info(f"Blog tags are: {blog_tags}")
@@ -100,47 +116,58 @@ def generate_youtube_blog(yt_url_list, output_format="markdown"):
            logger.info(f"Blog categories are: {blog_categories}")
            # Generate an introduction for the blog
-            blog_intro = get_blog_intro(title, yt_blog)
+            #blog_intro = get_blog_intro(title, blog_markdown_str)
-            logger.info(f"The Blog intro is:\n {blog_intro}")
+            #logger.info(f"The Blog intro is:\n {blog_intro}")
-            blog_markdown_str = blog_markdown_str + "\n\n" + f"{blog_intro}" + "\n\n"
+            #blog_markdown_str = blog_markdown_str + "\n\n" + f"{blog_intro}" + "\n\n"
            # Generate an image based on meta description
            logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
            main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
            main_img_path = optimize_image(main_img_path)
            # Get a variation of the yt url screenshot to use in the blog.
            #varied_img_path = gen_new_from_given_img(yt_img_path, image_dir)
            #logger.info(f"Image path: {main_img_path} and varied path: {varied_img_path}")
-            #blog_markdown_str = blog_markdown_str + f'![img-description]({os.path.basename(varied_img_path)})' + '_Image Caption_'
+            #blog_markdown_str = blog_markdown_str + f'![img-description]({os.path.basename(varied_img_path)})' + f'_{yt_title}_'
            #stbdiff_img_path = generate_image(yt_img_path, image_dir, "stable_diffusion")
            #logger.info(f"Image path: {main_img_path} from stable diffusion: {stbdiff_img_path}")
-            #blog_markdown_str = blog_markdown_str + f'![img-description]({os.path.basename(stbdiff_img_path)})' + f'_{title}_'
+            #blog_markdown_str = blog_markdown_str + f'![img-description]({os.path.basename(stbdiff_img_path)})' + f'_{yt_title}_'
            # Add the body of the blog content.
-            blog_markdown_str = blog_markdown_str + "\n\n" + f'{yt_blog}' + "\n\n"
+            #blog_markdown_str = blog_markdown_str + "\n\n" + f'{yt_blog}' + "\n\n"
            # Get the Conclusion of the blog, by passing the generated blog.
-            blog_conclusion = get_blog_conclusion(blog_markdown_str)
+            #blog_conclusion = get_blog_conclusion(blog_markdown_str)
            # TBD: Add another image.
-            blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n\n" + f"{blog_conclusion}" + "\n"
+            #blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n\n" + f"{blog_conclusion}" + "\n"
            # Proofread the blog, edit and remove dubplicates and refine it further.
            # Presently, fixing the blog keywords to be tags and categories.
-            blog_keywords = f"{blog_tags} + {blog_categories}"
+            #blog_keywords = f"{blog_tags} + {blog_categories}"
-            blog_markdown_str = blog_proof_editor(blog_markdown_str, blog_keywords)
+            #blog_markdown_str = blog_proof_editor(blog_markdown_str, blog_keywords)
            #logger.warning("\n\n--------------- 3rd draft after proofreading: --------\n\n")
            #logger.info(f"{blog_markdown_str}\n")
            #logger.warning("--------------------END of 3rd draft----------\n\n")
            # Check the type of blog format needed by the user.
            if 'html' in output_format:
-                blog_markdown_str = convert_tomarkdown_format(blog_markdown_str)
+                logger.info("Converting final blog to HTML format.")
            elif 'markdown' in output_path:
                blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
            elif 'markdown' in output_format:
                logger.info("Converting final blog to Markdown style.")
                blog_markdown_str = convert_tomarkdown_format(blog_markdown_str)
            logger.warning("\n\n--------------- Final Blog Content: --------\n\n")
            logger.info(f"{blog_markdown_str}\n")
            logger.warning("--------------------END of Blog Content----------\n\n")
            # Try to save the blog content in a file, in whichever format. Just dump it.
            try:
-                save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
+                save_blog_to_file(blog_markdown_str, title, blog_meta_desc, 
                        blog_tags, blog_categories, main_img_path, output_path)
            except Exception as err:
-                logger.error("Failed to Save blog content: {blog_markdown_str}")
+                logger.error(f"Failed to Save blog content: {err}")
        except Exception as e:
            # raise assertionerror
@@ -288,29 +315,6 @@ def generate_blog_topics(blog_keywords, num_blogs, niche):
        SystemError(f"Error in generating blog topics: {err}")
 def generate_blog_title(blog_meta_desc):
    """
    Given a blog title generate an outline for it
    """
    # TBD: Remove hardcoding, make dynamic
    prompt = f"""As a SEO expert and content writer, I will provide you with meta description of blog. 
        Your task is write a SEO optimized, call to action and engaging blog title for it.
        Follows SEO best practises to suggest the blog title. 
        Please keep the titles concise, not exceeding 60 words, and ensure to maintain their meaning. 
        Respond with only one title and no description or keyword like Title: 
        Generate blog title for this given meta description: {blog_meta_desc}
        """
    # The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
    # TBD: Include --niche
    logger.info(f"Prompt used for blog title :{prompt}")
    try:
        response = openai_chatgpt(prompt)
    except Exception as err:
        SystemError(f"Error in generating Blog Title: {err}")
    return response
 def generate_topic_outline(blog_title, num_subtopics):
    """
    Given a blog title generate an outline for it
    """
@@ -386,122 +390,6 @@ def get_blog_conclusion(blog_content):
        return response
 def generate_blog_description(blog_content):
    """
        Prompt designed to give SEO optimized blog descripton
    """
    prompt = f"""As an expert SEO and blog writer, Compose a compelling meta description for the given blog content, 
        adhering to SEO best practices. Keep it between 150-160 characters, incorporating active verbs, 
        avoiding all caps and excessive punctuation. Ensure relevance, engage users, and encourage clicks.
        Use keywords naturally and provide a glimpse of the content's value to entice readers.
        Respond with only one of your best effort and do not include your explanations. 
        Blog Content: {blog_content}"""
    try:
        # TBD: Add logic for which_provider and which_model
        response = openai_chatgpt(prompt)
        return response
    except Exception as err:
        SystemError(f"Error in generating blog description: {err}")
 def get_blog_tags(blog_article):
    """
        Function to suggest tags for the given blog content
    """
    # Suggest at least 5 tags for the following blog post [Enter your blog post text here].
    prompt = f"""As an expert SEO and blog writer, suggest only 2 relevant and specific blog tags
         for the given blog content. Only reply with comma separated values. 
         Blog content:  {blog_article}."""
    try:
        # TBD: Add logic for which_provider and which_model
        response = openai_chatgpt(prompt)
    except Exception as err:
        SystemError(f"Error in generating blog tags: {err}")
    else:
        return response
 def get_blog_categories(blog_article):
    """
    Function to generate blog categories for given blog content.
    """
    prompt = f"""As an expert SEO and content writer, I will provide you with blog content.
            Suggest only 2 blog categories which are most relevant to provided blog content,
            by identifying the main topic. Also consider the target audience and the
            blog's category taxonomy. Only reply with comma separated values. The blog content is: {blog_article}"
            """
    try:
        # TBD: Add logic for which_provider and which_model
        response = openai_chatgpt(prompt)
    except Exception as err:
        SystemError(f"Error in generating blog categories: {err}")
    else:
        return response
 def save_blog_to_file(blog_content, blog_title, 
        blog_meta_desc, blog_tags, blog_categories, main_img_path, file_type="md"):
    """ Common function to save the generated blog to a file.
    arg: file_type can be md or html
    """
    # Convert the spaces in blog_title with dash
    logger.info(f"The blog will be saved at: {output_path}")
    logger.debug(f"Blog Title is: {blog_title}")
    blog_title_md = blog_title
    regex = re.compile('[^a-zA-Z0-9- ]')
    blog_title_md = regex.sub('', blog_title_md)
    blog_title= blog_title.replace(":", "")
    blog_title_md = re.sub('--+', '-', blog_title_md)
    blog_title_md = blog_title_md.replace(' ', '-')
    blog_title_md = remove_stop_words(blog_title_md)
    if ':' in blog_meta_desc:
        blog_meta_desc  = blog_meta_desc.split(':')[1].strip()
    if not os.path.exists(output_path):
        logger.error("Error: Blog output directory is set to {output_path}, which Does Not Exist.")
    # Different output formats are plaintext, html and markdown.
    if file_type in "md":
        logger.info(f"Writing/Saving the resultant blog content in Markdown format.")
        # fill the Front Matter as below at the top of the post: https://jekyllrb.com/docs/front-matter/
        # date: YYYY-MM-DD HH:MM:SS +/-TTTT
        from zoneinfo import ZoneInfo
        tz=ZoneInfo('Asia/Kolkata')
        dtobj = datetime.datetime.now(tz=ZoneInfo('Asia/Kolkata'))
        formatted_date = f"{dtobj.strftime('%Y-%m-%d %H:%M:%S %z')}"
        blog_frontmatter = f"""\
                        ---
                        title: {blog_title}
                        date: {formatted_date}
                        categories: [{blog_categories}]
                        tags: [{blog_tags}]
                        description: {blog_meta_desc}
                        img_path: '/assets/'
                        image:
                            path: {os.path.basename(main_img_path)}
                            alt: {blog_title}
                        ---\n\n"""
        # Create a new file named YYYY-MM-DD-TITLE.EXTENSION and put it in the _posts of the root directory. 
        # Please note that the EXTENSION must be one of md or markdown
        blog_output_path = os.path.join(
                output_path,
                f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
                )
        # Save the generated blog content to a file.
        try:
            with open(blog_output_path, "w") as f:
                f.write(dedent(blog_frontmatter))
                f.write(blog_content)
        except Exception as e:
            raise Exception(f"Failed to write blog content: {e}")
        logger.info(f"\nSuccessfully saved and Posted blog at: {blog_output_path,}\n")
 def get_related_keywords(num_blogs, keywords, niche):
    """
    Helper function to get more keywords from GPTs.
@@ -525,131 +413,6 @@ def get_related_keywords(num_blogs, keywords, niche):
        SystemError(f"Error in getting related keywords.")
 # Helper function
 def remove_stop_words(sentence):
    # Tokenize the sentence into words
    words = nltk.word_tokenize(sentence)
    # Get the list of English stop words
    stop_words = set(stopwords.words('english'))
    # Remove stop words from the sentence
    filtered_words = [word for word in words if word.lower() not in stop_words]
    # Join the filtered words back into a sentence
    filtered_sentence = ' '.join(filtered_words)
    return filtered_sentence
 def convert_tomarkdown_format(blog_content):
    """ Helper for converting content to markdown format for static sites. """
    prompt = f"""
    As an expert in markdown language format and font matter, used for static webpages.
    Your task is to convert and improve formatting of given blog content.
    Do Not modify the content, only modify to convert it into highly readable blog content.
    Use below guidelines and include other best practises:
    1). Headers for Structure: Use # for main headings and increase the number of # for 
    subheadings (##, ###, etc.). Organize given content into clear, hierarchical sections.
    2). Emphasizing Text: Use single asterisks or underscores for italic (*italic* or _italic_), 
    double for bold (**bold** or __bold__), and triple for bold italic (***bold italic***).
    3). Lists: For unordered lists, use dashes, asterisks, or plus signs (-, *, +). 
    For ordered lists, use numbers followed by periods (1., 2., etc.).
    4). Blockquotes: Use > for blockquotes, and add additional > for nested blockquotes.
    5). Code Blocks: Use backticks for inline code (code) and triple backticks for code blocks. 
    Specify a language for syntax highlighting.
    6). Horizontal Lines: Create a horizontal line using three or more asterisks, dashes, or underscores (---, ***).
    7). Table Formatting: Use pipes | and dashes - to create tables. Align text with colons.
    Convert the given blog content in well organised markdown content: {blog_content}"""
    try:
        # TBD: Add logic for which_provider and which_model
        response = openai_chatgpt(prompt)
        return response
    except Exception as err:
        SystemError(f"Error in converting to Markdown format.")
 def convert_markdown_to_html(md_content):
    """ Helper function to convert given text to HTML
    """
    prompt =f"""
 			You are a skilled web developer tasked with converting a Markdown-formatted text to HTML. 
            You will be given text in markdown format. Follow these steps to perform the conversion:
 			1. Parse User's Markdown Input: You will receive a Markdown-formatted text as input from the user. 
            Carefully analyze the provided Markdown text, paying attention to different elements such as headings (#), 
            lists (unordered and ordered), bold and italic text, links, images, and code blocks.
 			2. Generate and Validate HTML: Generate corresponding HTML code for each Markdown element following 
            the conversion guidelines below. Ensure the generated HTML is well-structured and syntactically correct.
 			3. Preserve Line Breaks: Markdown line breaks (soft breaks) represented by two spaces at the end of a 
            line should be converted to <br> tags in HTML to preserve the line breaks.
 			4. REMEMBER to generate complete, valid HTML response only.
 			Follow below Conversion Guidelines:
 			- Headers: Convert Markdown headers (#, ##, ###, etc.) to corresponding HTML header tags (<h1>, <h2>, <h3>, etc.).
 			- Lists: Convert unordered lists (*) and ordered lists (1., 2., 3., etc.) to <ul> and <ol> HTML tags, respectively. 
            List items should be enclosed in <li> tags.
 			- Emphasis: Convert bold (**) and italic (*) text to <strong> and <em> HTML tags, respectively.
 			- Links: Convert Markdown links ([text](url)) to HTML anchor (<a>) tags. Ensure the href attribute contains the correct URL.
 			- Images: Convert Markdown image tags (![alt text](image_url)) to HTML image (<img>) tags. 
            Include the alt attribute for accessibility.
 			- Code: Convert inline code (`code`) to <code> HTML tags. Convert code blocks (```) to <pre> HTML tags 
            for preserving formatting.
 			- Blockquotes: Convert blockquotes (>) to <blockquote> HTML tags.
 			Convert the following Markdown text to HTML:  {md_content}
            """
    try:
        # TBD: Add logic for which_provider and which_model
        response = openai_chatgpt(prompt)
        return response
    except Exception as err:
        SystemError(f"Error in convert to HTML")
 def blog_with_research(report, blog):
    """Combine the given online research and gpt blog content"""
    prompt = f"""
        You are an expert copywriter specializing in content optimization for SEO.
        I will provide you with a research report and a blog content on the same topic.
        Treat the research report as the context for the blog and better it accordingly.
        Your task is to transform and combine the given research and blog content into a well-structured, unique
        and engaging blog article. 
        Your objectives include:
        1. Master the report and blog content: Understand main ideas, key points, and the core message.
        2. Sentence Structure: Rephrase while preserving logical flow and coherence.
        3. Identify Main Keyword: Determine the primary topic and combine the articles on the main topic.
        4. Keyword Integration: Naturally integrate keywords in headings, subheadings, and body text, avoiding overuse.
        5. Write Unique Content: Avoid direct copying from given report and blog; rewrite in your own words and style.
        6. Optimize for SEO: Generate high quality informative content. 
        Implement SEO best practises with appropriate keyword density.
        7. Craft Engaging and Informative Article: Provide value and insight to readers.
        8. Proofread: Important to Check for grammar, spelling, and punctuation errors.
        9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, 
        interjections, and colloquialisms. Avoid repetitive phrases and unnatural sentence structures.
        10. Structuring: Include an Introduction, subtopics and use bullet points or 
        numbered lists if appropriate. Important to include FAQs, and Conclusion.
        11. Ensure Uniqueness: Guarantee the article is plagiarism-free. Write in unique, informative style.
        12. Punctuation: Use appropriate question marks at the end of questions.
        13. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
        14. REMEMBER to give final response as complete HTML.
        Follow these guidelines to create a well-optimized, unique, and informative article 
        that will rank well in search engine results and engage readers effectively.
        Create a blog post from the given research report and blog content below.
        Research report: {report}
        Blog content: {blog}
        """
    try:
        # TBD: Add logic for which_provider and which_model
        response = openai_chatgpt(prompt)
        return response
    except Exception as err:
        SystemError(f"Error in getting related keywords.")
 def blog_proof_editor(blog_content, blog_keywords):
    """
        Helper for blog proof reading.
@@ -659,12 +422,12 @@ def blog_proof_editor(blog_content, blog_keywords):
        exit(1)
    prompt = f"""I am looking for detailed editing and enhancement of the given blog post, 
-        with a particular focus on maintaining originality. 
+        with a particular focus on originality. I will provide you with a blog content and its keywords. 
-        The topic of the content is [{blog_keywords}]. Please go through the blog and make direct edits to improve it, 
+        The keywords for the blog are [{blog_keywords}]. Please go through the blog and make direct edits to improve it, 
        ensuring the final output is both high-quality and original. 
        Note: There are duplicates headings and corresponding paragraphs, rewrite into one subheading.
-        Here are the specific areas to focus on:
+        Here are the specific guidelines to focus on:
        1). Ensure Originality: Edit any sections that lack originality, replacing them with unique and creative content.
        2). Eliminate Repetitive Language: Rewrite repetitive phrases with varied and engaging language.
@@ -677,8 +440,11 @@ def blog_proof_editor(blog_content, blog_keywords):
        7). Remove Redundancies: Important, Cut out any redundant information or overly complex jargon.
        8). Refine Overall Structure: Make structural changes to improve the overall impact of the content.
        9). Remember, rewrite all content that repeated, while maintaining the formatting of the given blog text.
        10). Remember Not to include SEO meta description and Title in your final response.
        11). REMEMBER to maintain the formatting style of the provided blog.
        12). Judge if the given blog is about technology then provide code snippets and examples for it.
-        Please apply these changes directly to the following blog text and provide the edited version: 
+        Please make direct changes as per above guideline to the provided blog text below: 
        [{blog_content}]. """
    try:
--- a/lib/gpt_online_researcher.py
+++ b/lib/gpt_online_researcher.py
@@ -68,11 +68,11 @@ def do_online_research(query):
                        f'Please use MLA format and markdown syntax.')
        }]
        # Run GPT-4
-        logging.info("Generating report with GPT-4...")
+        logging.info("Generating Research report with GPT-4...")
        lc_messages = convert_openai_messages(prompt)
        try:
            report = ChatOpenAI(model='gpt-4', openai_api_key=openai_api_key).invoke(lc_messages).content
-            logging.info(f"\n Below is the online research report for given keywords/title: \n\n{report}")
+            #logging.info(f"\n Below is the online research report for given keywords/title: \n\n{report}")
            return report
        except Exception as err:
            logging.error("Failed to generate do_online_research with ChatOpenAI")
--- a/lib/gpt_providers/stt_audio_blog.py
+++ b/lib/gpt_providers/stt_audio_blog.py
@@ -71,7 +71,7 @@ def speech_to_text(video_url, output_path='.'):
                file=open(audio_file, "rb"),
                response_format="text"
            )
-            logger.info("\nYouTube video transcription:\n\n{transcript}\n")
+            logger.info(f"\nYouTube video transcription:\n\n{transcript}\n")
            return transcript, yt.title
        except Exception as e:
--- a/lib/online_research_agent.py
+++ b/lib/online_research_agent.py
@@ -0,0 +1,201 @@
 import os
 import requests
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain import PromptTemplate
 from langchain.chains.summarize import load_summarize_chain
 from bs4 import BeautifulSoup
 from langchain.chat_models import ChatOpenAI
 from dotenv import load_dotenv
 import json
 from autogen import config_list_from_json
 from autogen.agentchat.contrib.gpt_assistant_agent import GPTAssistantAgent
 from autogen import UserProxyAgent
 import autogen
 load_dotenv()
 brwoserless_api_key = os.getenv("BROWSERLESS_API_KEY")
 serper_api_key = os.getenv("SERP_API_KEY")
 airtable_api_key = os.getenv("AIRTABLE_API_KEY")
 config_list = config_list_from_json("OAI_CONFIG_LIST")
 # ------------------ Create functions ------------------ #
 # Function for google search
 def google_search(search_keyword):    
    url = "https://google.serper.dev/search"
    payload = json.dumps({
        "q": search_keyword
    })
    headers = {
        'X-API-KEY': serper_api_key,
        'Content-Type': 'application/json'
    }
    response = requests.request("POST", url, headers=headers, data=payload)
    print("RESPONSE:", response.text)
    return response.text
 # Function for scraping
 def summary(objective, content):
    llm = ChatOpenAI(temperature = 0, model = "gpt-3.5-turbo-16k-0613")
    text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size = 10000, chunk_overlap=500)
    docs = text_splitter.create_documents([content])
    map_prompt = """
    Write a summary of the following text for {objective}:
    "{text}"
    SUMMARY:
    """
    map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text", "objective"])
    summary_chain = load_summarize_chain(
        llm=llm, 
        chain_type='map_reduce',
        map_prompt = map_prompt_template,
        combine_prompt = map_prompt_template,
        verbose = False
    )
    output = summary_chain.run(input_documents=docs, objective=objective)
    return output
 def web_scraping(objective: str, url: str):
    #scrape website, and also will summarize the content based on objective if the content is too large
    #objective is the original objective & task that user give to the agent, url is the url of the website to be scraped
    print("Scraping website...")
    # Define the headers for the request
    headers = {
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/json',
    }
    # Define the data to be sent in the request
    data = {
        "url": url        
    }
    # Convert Python object to JSON string
    data_json = json.dumps(data)
    # Send the POST request
    response = requests.post(f"https://chrome.browserless.io/content?token={brwoserless_api_key}", headers=headers, data=data_json)
    # Check the response status code
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        text = soup.get_text()
        print("CONTENTTTTTT:", text)
        if len(text) > 10000:
            output = summary(objective,text)
            return output
        else:
            return text
    else:
        print(f"HTTP request failed with status code {response.status_code}")        
 # Function for get airtable records
 def get_airtable_records(base_id, table_id):
    url = f"https://api.airtable.com/v0/{base_id}/{table_id}"
    headers = {
        'Authorization': f'Bearer {airtable_api_key}',
    }
    response = requests.request("GET", url, headers=headers)
    data = response.json()
    print(data)
    return data
 # Function for update airtable records
 def update_single_airtable_record(base_id, table_id, id, fields):
    url = f"https://api.airtable.com/v0/{base_id}/{table_id}"
    headers = {
        'Authorization': f'Bearer {airtable_api_key}',
        "Content-Type": "application/json"
    }
    data = {
        "records": [{
            "id": id,
            "fields": fields
        }]
    }
    response = requests.patch(url, headers=headers, data=json.dumps(data))
    data = response.json()
    return data
 # ------------------ Create agent ------------------ #
 # Create user proxy agent
 user_proxy = UserProxyAgent(name="user_proxy",
    is_termination_msg=lambda msg: "TERMINATE" in msg["content"],
    human_input_mode="ALWAYS",
    max_consecutive_auto_reply=1
    )
 # Create researcher agent
 researcher = GPTAssistantAgent(
    name = "researcher",
    llm_config = {
        "config_list": config_list,
        "assistant_id": "asst_qyvioid5My8K3SdFClaEnwmB"
    }
 )
 researcher.register_function(
    function_map={
        "web_scraping": web_scraping,
        "google_search": google_search
    }
 )
 # Create research manager agent
 research_manager = GPTAssistantAgent(
    name="research_manager",
    llm_config = {
        "config_list": config_list,
        "assistant_id": "asst_C1Ta5XmmEcYD6vnOSVflnwG9"
    }
 )
 # Create director agent
 director = GPTAssistantAgent(
    name = "director",
    llm_config = {
        "config_list": config_list,
        "assistant_id": "asst_zVBJGch5mOyCYl9H1J3L9Ime",
    }
 )
 director.register_function(
    function_map={
        "get_airtable_records": get_airtable_records,
        "update_single_airtable_record": update_single_airtable_record
    }
 )
 # Create group chat
 groupchat = autogen.GroupChat(agents=[user_proxy, researcher, research_manager, director], messages=[], max_round=15)
 group_chat_manager = autogen.GroupChatManager(groupchat=groupchat, llm_config={"config_list": config_list})
 # ------------------ start conversation ------------------ #
 message = """
 Research the funding stage/amount & pricing for each company in the list: https://airtable.com/appj0J4gFpvLrQWjI/tblF4OmG6oLjYtgZl/viwmFx2ttAVrJm0E3?blocks=hide
 """
 user_proxy.initiate_chat(group_chat_manager, message=message)
--- a/lib/optimize_images_for_upload.py
+++ b/lib/optimize_images_for_upload.py
@@ -0,0 +1,112 @@
 import sys
 import os
 import tinify
 from PIL import Image
 from loguru import logger
 from tqdm import tqdm
 from dotenv import load_dotenv 
 #default directory for .env file is the current directory
 #if you set .env in different directory, put the directory address load_dotenv("directory_of_.env)
 load_dotenv()
 # Retrieve Tinyfy API key from environment variable
 tinify.key = os.getenv('TINIFY_API_KEY')
 # Configure logger
 logger.remove()
 logger.add(sys.stdout, colorize=True, format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}")
 def compress_image(image_path, quality=45, resize=None, preserve_exif=False):
    """
    Compress and optionally resize an image, and overwrite the original image.
    Args:
        image_path (str): Path to the original image.
        quality (int): Quality of the output image (1-100).
        resize (tuple): Tuple (width, height) to resize image.
        preserve_exif (bool): Preserve EXIF data if True.
    """
    if not os.path.exists(image_path):
        logger.error(f"Image path does not exist: {image_path}")
        return
    original_size = os.path.getsize(image_path)
    try:
        with Image.open(image_path) as img:
            img_format = img.format
            exif = img.info['exif'] if preserve_exif and 'exif' in img.info else None
            if resize:
                img = img.resize(resize, Image.ANTIALIAS)
            img.save(image_path, format=img_format, quality=quality, optimize=True, exif=exif)
            compressed_size = os.path.getsize(image_path)
            reduction = (1 - (compressed_size / original_size)) * 100
            logger.info(f"Compressed {image_path}, Reduction: {reduction:.2f}%")
    except Exception as e:
        logger.error(f"Error compressing image {image_path}: {e}")
 def is_image_file(filename):
    """
    Check if a file is an image based on its extension.
    Args:
        filename (str): Name of the file to check.
    Returns:
        bool: True if the file is an image, False otherwise.
    """
    valid_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
    return any(filename.lower().endswith(ext) for ext in valid_extensions)
 def convert_to_webp(image_path):
    """
    Convert an image to WebP format.
    Args:
        image_path (str): Path to the original image.
    Returns:
        str: Path to the WebP image.
    """
    if not os.path.exists(image_path):
        logger.error(f"Image path does not exist: {image_path}")
        return
    try:
        with Image.open(image_path) as img:
            webp_path = os.path.splitext(image_path)[0] + '.webp'
            img.save(webp_path, 'WEBP')
            logger.info(f"Converted {image_path} to WebP")
            return webp_path
    except Exception as e:
        logger.error(f"Error converting image to WebP: {e}")
 def compress_image_tinyfy(image_path):
    """
    Compress the image using Tinyfy API.
    Args:
        image_path (str): Path to the original image.
    """
    if not os.path.exists(image_path):
        logger.error(f"Image path does not exist: {image_path}")
        return
    try:
        source = tinify.from_file(image_path)
        source.to_file(image_path)
        logger.info(f"Compressed {image_path} using Tinyfy API")
    except tinify.Error as e:
        logger.error(f"Tinyfy API error: {e}")
 def optimize_image(image_path):
    image_path = convert_to_webp(image_path)
    compress_image_tinyfy(image_path)
    compress_image(image_path)
    return image_path
--- a/lib/save_blog_to_file.py
+++ b/lib/save_blog_to_file.py
@@ -1,76 +1,101 @@
-def save_blog_to_file(blog_content, blog_title, 
+import sys
-        blog_meta_desc, blog_tags, blog_categories, main_img_path, file_type="md"):
+import os
-    """ Common function to save the generated blog to a file.
+import re
-    arg: file_type can be md or html
+import datetime
 from textwrap import dedent
 import logging
 from zoneinfo import ZoneInfo
 import nltk
 from nltk.corpus import stopwords
 from loguru import logger
 logger.remove()
 logger.add(sys.stdout,
        colorize=True,
        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
    )
 def save_blog_to_file(blog_content, blog_title, blog_meta_desc, blog_tags, blog_categories, main_img_path, output_path, file_type="md"):
    """
-    # Convert the spaces in blog_title with dash
+    Saves the provided blog content to a file in the specified format.
-    logger.info(f"The blog will be saved at: {output_path}")
+
-    logger.debug(f"Blog Title is: {blog_title}")
+    Args:
-    blog_title_md = blog_title
+        blog_content (str): The main content of the blog.
-    regex = re.compile('[^a-zA-Z0-9- ]')
+        blog_title (str): Title of the blog.
-    blog_title_md = regex.sub('', blog_title_md)
+        blog_meta_desc (str): Meta description of the blog.
-    blog_title= blog_title.replace(":", "")
+        blog_tags (list): List of tags associated with the blog.
-    blog_title_md = re.sub('--+', '-', blog_title_md)
+        blog_categories (list): List of categories associated with the blog.
-    blog_title_md = blog_title_md.replace(' ', '-')
+        main_img_path (str): Path to the main image of the blog.
        output_path (str): Path to the directory where the blog will be saved.
        file_type (str, optional): The file format for saving the blog ('md' for Markdown or 'html' for HTML). Defaults to 'md'.
    Raises:
        FileNotFoundError: If the output_path does not exist.
        Exception: If the blog content cannot be written to the file.
    """
    # Sanitize and prepare the blog title
    # Remove colon and ampersand
    blog_title_md = blog_title.replace(":", "").replace("&", "")
    # Replace spaces with hyphens
    blog_title_md = blog_title_md.replace(" ", "-")
    blog_title_md = re.sub('[^A-Za-z0-9-]', '', blog_title_md)
    # Replace multiple consecutive dashes with a single dash
    blog_title_md = re.sub('-+', '-', blog_title_md)
    blog_title_md = remove_stop_words(blog_title_md)
    logger.debug(f"Blog Title is: {blog_title_md}")
-    if ':' in blog_meta_desc:
+    # Check if output path exists
        blog_meta_desc  = blog_meta_desc.split(':')[1].strip()
    if not os.path.exists(output_path):
-        logger.error("Error: Blog output directory is set to {output_path}, which Does Not Exist.")
+        logger.error(f"Error: Blog output directory is set to {output_path}, which does not exist.")
        raise FileNotFoundError(f"Output directory does not exist: {output_path}")
-    # Different output formats are plaintext, html and markdown.
+    # Handle Markdown file type
-    if file_type in "md":
+    if file_type == "md":
-        logger.info(f"Writing/Saving the resultant blog content in Markdown format.")
+        logger.info("Writing/Saving the resultant blog content in Markdown format.")
-        # fill the Front Matter as below at the top of the post: https://jekyllrb.com/docs/front-matter/
+        dtobj = datetime.datetime.now(ZoneInfo('Asia/Kolkata'))
-        # date: YYYY-MM-DD HH:MM:SS +/-TTTT
+        formatted_date = dtobj.strftime('%Y-%m-%d %H:%M:%S %z')
-        from zoneinfo import ZoneInfo
+        blog_title = blog_title.replace(":", "-").replace('"', '')
-        tz=ZoneInfo('Asia/Kolkata')
+        blog_frontmatter = dedent(f"""\
        dtobj = datetime.datetime.now(tz=ZoneInfo('Asia/Kolkata'))
        formatted_date = f"{dtobj.strftime('%Y-%m-%d %H:%M:%S %z')}"
        blog_frontmatter = f"""\
                        ---
                        title: {blog_title}
                        date: {formatted_date}
                        categories: [{blog_categories}]
                        tags: [{blog_tags}]
-                        description: {blog_meta_desc}
+                        description: {blog_meta_desc.replace(":", "-")}
                        img_path: '/assets/'
                        image:
                            path: {os.path.basename(main_img_path)}
                            alt: {blog_title}
-                        ---\n\n"""
+                        ---\n\n""")
        # Create a new file named YYYY-MM-DD-TITLE.EXTENSION and put it in the _posts of the root directory. 
        # Please note that the EXTENSION must be one of md or markdown
        blog_output_path = os.path.join(
-                output_path,
+            output_path,
-                f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
+            f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
-                )
+        )
-        # Save the generated blog content to a file.
+
        # Write to the file
        try:
            with open(blog_output_path, "w") as f:
-                f.write(dedent(blog_frontmatter))
+                f.write(blog_frontmatter)
                f.write(blog_content)
        except Exception as e:
            raise Exception(f"Failed to write blog content: {e}")
-        logger.info(f"\nSuccessfully saved and Posted blog at: {blog_output_path,}\n")
+
        logger.info(f"Successfully saved and posted blog at: {blog_output_path}")
 # Helper function
 def remove_stop_words(sentence):
-    # Tokenize the sentence into words
+    """
    Removes stop words from a given sentence.
    Args:
        sentence (str): The sentence from which to remove stop words.
    Returns:
        str: The sentence after removing stop words.
    """
    words = nltk.word_tokenize(sentence)
    # Get the list of English stop words
    stop_words = set(stopwords.words('english'))
    # Remove stop words from the sentence
    filtered_words = [word for word in words if word.lower() not in stop_words]
-
+    return ' '.join(filtered_words)
    # Join the filtered words back into a sentence
    filtered_sentence = ' '.join(filtered_words)
    return filtered_sentence
--- a/lib/seo_module/is_content_ai_generated.py
+++ b/lib/seo_module/is_content_ai_generated.py
--- a/lib/seo_module/plagiarism_checker_from_known_sources.py
+++ b/lib/seo_module/plagiarism_checker_from_known_sources.py
--- a/lib/toc_example.py
+++ b/lib/toc_example.py
@@ -1,44 +0,0 @@
 from bs4 import BeautifulSoup
 import re
 def create_table_of_contents(html_content):
    """
    Create a table of contents for a given HTML content.
    Args:
    html_content (str): HTML content of the blog post.
    Returns:
    str: HTML content with a table of contents.
    """
    # Use BeautifulSoup to parse the HTML
    soup = BeautifulSoup(html_content, 'html.parser')
    # Find all header tags (h1, h2, h3, h4, h5, h6)
    headers = soup.find_all(re.compile('^h[1-6]$'))
    # Create a table of contents
    toc = BeautifulSoup('<div id="table-of-contents"><h2>Table of Contents</h2><ul></ul></div>', 'html.parser')
    toc_ul = toc.find('ul')
    # Loop through headers and add them to the table of contents
    for i, header in enumerate(headers, start=1):
        header_id = f"toc_{i}"
        header['id'] = header_id
        toc_entry = soup.new_tag('li')
        toc_link = soup.new_tag('a', href=f"#{header_id}")
        toc_link.string = header.get_text()
        toc_entry.append(toc_link)
        toc_ul.append(toc_entry)
    # Insert the table of contents at the beginning of the content
    soup.insert(0, toc)
    return str(soup)
 # Example usage
 html_content = "<h1>Title</h1><p>Some text</p><h2>Subtitle 1</h2><p>Text under subtitle 1</p><h2>Subtitle 2</h2><p>Text under subtitle 2</p>"
 html_with_toc = create_table_of_contents(html_content)
 print(html_with_toc)
--- a/lib/write_blogs_from_youtube_videos.py
+++ b/lib/write_blogs_from_youtube_videos.py
@@ -7,6 +7,8 @@ import tempfile
 import openai
 from html2image import Html2Image
 from tqdm import tqdm, trange
 import google.generativeai as genai
 from loguru import logger
 logger.remove()
 logger.add(sys.stdout,
@@ -36,7 +38,8 @@ def youtube_to_blog(video_url):
    try:
        # Summarizing the content of the YouTube video
-        audio_blog_content = summarize_youtube_video(audio_text)
+        audio_blog_content = summarize_youtube_video_openai(audio_text, "gemini")
        logger.info("Successfully converted given URL to blog article.")
        return audio_blog_content, audio_title
    except Exception as e:
        logger.error(f"Error in summarize_youtube_video: {e}")
@@ -44,41 +47,51 @@ def youtube_to_blog(video_url):
    return audio_blog_content
-def summarize_youtube_video(user_content):
+def summarize_youtube_video(user_contenti, gpt_providers):
-    """Generates a summary of a YouTube video using OpenAI GPT-3 and displays a progress bar.
+    """Generates a summary of a YouTube video using OpenAI GPT-3 and displays a progress bar. 
    Args:
      video_link: The URL of the YouTube video to summarize.
    Returns:
      A string containing the summary of the video.
    """
    logger.info("Start summarize_youtube_video..")
    prompt = f"""
-        You are an expert copywriter specializing in content optimization for SEO. 
+        You are an expert copywriter specializing in digital content writing. I will provide you with a transcript. 
-        Your task is to transform a given transcript into a well-structured and engaging blog article. Your objectives include:
+        Your task is to transform a given transcript into a well-structured and informative blog article. 
        Please follow the below objectives:
        1. Master the Transcript: Understand main ideas, key points, and the core message.
        2. Sentence Structure: Rephrase while preserving logical flow and coherence. Dont quote anyone from video.
-        3. Identify Main Keywords: Determine the primary video topic.
+        3. Note: Check if the transcript is about programming, then include code examples and snippets in your article.
-        4. Keyword Integration: Naturally integrate keywords in headings, subheadings, and body text, avoiding overuse.
+        4. Write Unique Content: Avoid direct copying; rewrite in your own words. 
-        5. Write Unique Content: Avoid direct copying; rewrite in your own words. 
+        5. REMEMBER to avoid direct quoting and maintain uniqueness.
-           REMEMBER to avoid direct quoting and maintain uniqueness.
+        6. Proofread: Check for grammar, spelling, and punctuation errors.
-        6. Optimize for SEO: Implement meta tags, header tags, and appropriate keyword density.
+        7. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, interjections, and colloquialisms.        8. Avoid repetitive phrases and unnatural sentence structures.
-        7. Craft Engaging and Informative Article: Provide value and insight to readers.
+        9. Ensure Uniqueness: Guarantee the article is plagiarism-free.
-        8. Proofread: Check for grammar, spelling, and punctuation errors.
+        10. Punctuation: Use appropriate question marks at the end of questions.
-        9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, interjections, and colloquialisms. Avoid repetitive phrases and unnatural sentence structures.
+        11. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
-        10. Structuring: Include a Creative Title, SEO Meta-description, ## Introduction ##, bullet points or numbered lists if appropriate, FAQs, and Conclusion.
+        12. Rephrase words like 'video, youtube, channel' with 'article, blog' and such suitable words.
        11. Ensure Uniqueness: Guarantee the article is plagiarism-free.
        12. Punctuation: Use appropriate question marks at the end of questions.
        13. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
        14. Rephrase words like 'video, youtube, channel' with 'article, blog' and such suitable words.
-        Follow the above guidelines to create a well-optimized, unique, and informative article that will rank well in search engine results and engage readers effectively.
+        Follow the above guidelines to create a well-optimized, unique, and informative article,
-        Craft a blog content from the following transcript:\n{user_content}
+        that will rank well in search engine results and engage readers effectively.
        Follow above guidelines to craft a blog content from the following transcript:\n{user_content}
        """
-    try:
+    if 'gemini' in gpt_providers:
-        response = openai_chatgpt(prompt)
+        try:
-        return response
+            genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
-    except Exception as err:
+        except Exception as err:
-        SystemError(f"Error in generating blog summary: {err}")
+            logger.error("Failed in getting GEMINI_API_KEY")
        # Use gemini-pro model for text and image.
        model = genai.GenerativeModel('gemini-pro')
        try:
            response = model.generate_content(prompt)
            return response.text
        except Exception as err:
            logger.error("Failed to get response from gemini.")
    elif 'openai' in gpt_providers:
        try:
            response = openai_chatgpt(prompt)
            return response
        except Exception as err:
            SystemError(f"Error in generating blog summary: {err}")
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,133 @@
 aiofiles
 aiohttp
 aiosignal
 annotated-types
 anyio
 args
 async-timeout
 asyncio
 attrs
 beautifulsoup4
 blinker
 blis
 Brotli
 catalogue
 certifi
 cffi
 charset-normalizer
 chromedriver-autoinstaller
 click
 clint
 cloudpathlib
 colorama
 confection
 cssselect2
 cymem
 dataclasses-json
 decorator
 distro
 docopt
 duckduckgo-search
 exceptiongroup
 fastapi
 Flask
 fonttools
 frozenlist
 greenlet
 grpcio
 grpcio-tools
 h11
 h2
 hpack
 html2image
 html5lib
 httpcore
 httpx
 hyperframe
 idna
 imageio
 imageio-ffmpeg
 itsdangerous
 Jinja2
 joblib
 jsonpatch
 jsonpointer
 langchain
 langchain-core
 langcodes
 langsmith
 loguru
 lxml
 Markdown
 markdown2
 MarkupSafe
 marshmallow
 md2pdf
 moviepy
 multidict
 murmurhash
 mypy-extensions
 nltk
 numpy
 openai
 outcome
 packaging
 param
 permchain
 Pillow
 playwright
 preshed
 proglog
 protobuf
 pycparser
 pydantic
 pydantic_core
 pydub
 pydyf
 pyee
 pyphen
 PySocks
 python-dotenv
 python-multipart
 pytube
 PyYAML
 regex
 requests
 selenium
 serpapi
 six
 smart-open
 sniffio
 socksio
 sortedcontainers
 soupsieve
 spacy-legacy
 spacy-loggers
 SQLAlchemy
 srsly
 stability-sdk
 starlette
 tavily-python
 tenacity
 thinc
 tiktoken
 tinycss2
 tqdm
 trio
 trio-websocket
 typer
 typing-inspect
 typing_extensions
 urllib3
 uvicorn
 wasabi
 weasel
 weasyprint
 webdriver-manager
 webencodings
 websocket-client
 Werkzeug
 wsproto
 yarl
 youtube-transcript-api
 zopfli