From 3ec74d22b58a1b3cd17a26be58ff429fbe7d758d Mon Sep 17 00:00:00 2001
From: AjaySi <ajay.calsoft@gmail.com>
Date: Fri, 24 Nov 2023 15:15:27 +0530
Subject: [PATCH] Blogen-V0.1 Added features. WIP

---
 .gitignore                                    |   2 +
 lib/get_text_response.py                      | 257 +++++++--------
 lib/gpt-researcher                            |   1 -
 lib/gpt_online_researcher.py                  |  69 ++++
 lib/gpt_providers/openai_gpt_provider.py      |  80 +++++
 lib/gpt_vision_image_details.py               |  91 ++++++
 lib/plagiarism_checker/main.py                |  20 --
 .../plagiarism_checker_from_known_sources.py  |   0
 lib/stabl_diff_img2html.py                    |  10 +-
 .../wix_integration_bard.py                   |  78 -----
 .../wordpress_blog_uploader.py                | 101 ------
 lib/wordpress_blog_uploader.py                | 303 ++++++++++++++++++
 12 files changed, 681 insertions(+), 331 deletions(-)
 delete mode 160000 lib/gpt-researcher
 create mode 100644 lib/gpt_online_researcher.py
 create mode 100644 lib/gpt_vision_image_details.py
 delete mode 100644 lib/plagiarism_checker/main.py
 rename lib/{plagiarism_checker => }/plagiarism_checker_from_known_sources.py (100%)
 delete mode 100644 lib/webhosting_integrations/wix_integration_bard.py
 delete mode 100644 lib/webhosting_integrations/wordpress_blog_uploader.py
 create mode 100644 lib/wordpress_blog_uploader.py

diff --git a/.gitignore b/.gitignore
index e3e01753..4e8cad2d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,5 @@ pseo-experiments/lib/python3.10/
 pseo-experiments/bin/
 
 blog_images/
+
+pseo_website/
diff --git a/lib/get_text_response.py b/lib/get_text_response.py
index 784d9a43..45de7941 100644
--- a/lib/get_text_response.py
+++ b/lib/get_text_response.py
@@ -18,13 +18,16 @@ import time
 import re
 from textwrap import dedent
 import nltk
-nltk.download('punkt')
+nltk.download('punkt', quiet=True)
 from nltk.corpus import stopwords
-nltk.download('stopwords')
+nltk.download('stopwords', quiet=True)
 
 from .gpt_providers.openai_gpt_provider import openai_chatgpt, gen_new_from_given_img
+from .gpt_providers.openai_gpt_provider import analyze_and_extract_details_from_image
 from .generate_image_from_prompt import generate_image
 from .write_blogs_from_youtube_videos import youtube_to_blog
+from .wordpress_blog_uploader import compress_image, upload_blog_post, upload_media
+
 from loguru import logger
 logger.remove()
 logger.add(sys.stdout,
@@ -38,7 +41,9 @@ image_dir = os.path.join(os.getcwd(), image_dir)
 # TBD: This can come from config file.
 output_path = "pseo_website/_posts/"
 output_path = os.path.join(os.getcwd(), output_path)
-
+wordpress_url = 'https://latestaitools.in/'
+wordpress_username = 'upaudel750'
+wordpress_password = 'YvCS VbzQ QSp8 4XZe 0DUw Myys'
 
 
 def generate_youtube_blog(yt_url_list):
@@ -70,9 +75,9 @@ def generate_youtube_blog(yt_url_list):
             #logger.info(f"Image path: {main_img_path} and varied path: {varied_img_path}")
             #blog_markdown_str = blog_markdown_str + f'![img-description]({os.path.basename(varied_img_path)})' + '_Image Caption_'
 
-            stbdiff_img_path = generate_image(yt_img_path, image_dir, "stable_diffusion")
-            logger.info(f"Image path: {main_img_path} from stable diffusion: {stbdiff_img_path}")
-            blog_markdown_str = blog_markdown_str + f'![img-description]({os.path.basename(stbdiff_img_path)})' + f'_{title}_'
+            #stbdiff_img_path = generate_image(yt_img_path, image_dir, "stable_diffusion")
+            #logger.info(f"Image path: {main_img_path} from stable diffusion: {stbdiff_img_path}")
+            #blog_markdown_str = blog_markdown_str + f'![img-description]({os.path.basename(stbdiff_img_path)})' + f'_{title}_'
             
             # Add the body of the blog content.
             blog_markdown_str = blog_markdown_str + "\n\n" + f'{yt_blog}' + "\n\n"
@@ -90,7 +95,10 @@ def generate_youtube_blog(yt_url_list):
             logger.info(f"Blog categories are: {blog_categories}")
 
             save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
-            #html_blog = convert_markdown_to_html(blog_markdown_str)
+            if 'html' in output_format:
+                blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
+
+            save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
             #print(html_blog)
 
         except Exception as e:
@@ -99,7 +107,8 @@ def generate_youtube_blog(yt_url_list):
             exit(1)
 
 
-def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics):
+def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
+        wordpress=False, output_format="HTML"):
     """
     This function will take a blog Topic to first generate sections for it
     and then generate content for each section.
@@ -109,27 +118,34 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics):
 
     # TBD: Check if the generated topics are equal to what user asked.
     blog_topic_arr = generate_blog_topics(blog_keywords, num_blogs, niche)
-    logger.info(f"Generated Blog Topics:---- {blog_topic_arr}\n")
-    
+    logger.info(f"Generated Blog Topics:---- \n{blog_topic_arr}\n")
+    # Split the string at newlines
+    blog_topic_arr = blog_topic_arr.split('\n')
+
     # For each of blog topic, generate content.
     for a_blog_topic in blog_topic_arr:
         # if md/html
-        blog_markdown_str = "# " + a_blog_topic.replace('"', '') + "\n\n"
+        a_blog_topic = a_blog_topic.replace('"', '')
+        a_blog_topic = re.sub(r'^[\d.\s]+', '', a_blog_topic)
+        blog_markdown_str = "# " + a_blog_topic + "\n\n"
+        
         # Get the introduction specific to blog title and sub topics.
         tpc_outlines = generate_topic_outline(a_blog_topic, num_subtopics)
+        tpc_outlines = tpc_outlines.split("\n")
         
         blog_intro = get_blog_intro(a_blog_topic, tpc_outlines)
-        logger.info(f"The intro is:\n {blog_intro}")
+        logger.info(f"The intro is:\n{blog_intro}")
         blog_markdown_str = blog_markdown_str + "### Introduction" + "\n\n" + f"{blog_intro}" + "\n\n"
-
+        print(f"\n\n 1 -- BLOG_STR : {blog_markdown_str}\n\n")
         # Now, for each blog we have sub topic. Generate content for each of the sub topic.
         for a_outline in tpc_outlines:
-            sub_topic_content = generate_topic_content(blog_keywords, a_outline)
+            a_outline = a_outline.replace('"', '')
             logger.info(f"Generating content for sub-topic: {a_outline}")
+            sub_topic_content = generate_topic_content(blog_keywords, a_outline)
             # a_outline is sub topic heading, hence part ToC also.
-            blog_markdown_str = blog_markdown_str + "\n\n" + f"### {a_outline}" + "\n\n"
+            #blog_markdown_str = blog_markdown_str + "\n\n" + f"### {a_outline}" + "\n\n"
             blog_markdown_str = blog_markdown_str + "\n" + f"\n {sub_topic_content}" + "\n\n"
-            blog_markdown_str = blog_markdown_str + "\n" + "-------------------------" + "\n"
+            print(f"\n\n 3 -- BLOG_STR : {blog_markdown_str}\n\n")
 
         # Get the Conclusion of the blog, by passing the generated blog.
         blog_conclusion = get_blog_conclusion(blog_markdown_str)
@@ -139,7 +155,11 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics):
         logger.info(f"Final blog content: {blog_markdown_str}")
 
         blog_meta_desc = generate_blog_description(blog_markdown_str)
-        logger.info(f"\nGet the blog meta description:{blog_meta_desc}")
+        logger.info(f"\nThe blog meta description is:{blog_meta_desc}\n")
+
+        # Generate an image based on meta description
+        logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
+        main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
         
         blog_tags = get_blog_tags(blog_markdown_str)
         logger.info(f"\nBlog tags for generated content: {blog_tags}")
@@ -147,10 +167,46 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics):
         blog_categories = get_blog_categories(blog_markdown_str)
         logger.info(f"Generated blog categories: {blog_categories}")
 
-        # TBD: Save the blog content as a .md file. Markdown or HTML ?
-        save_blog_to_file(blog_markdown_str, a_blog_topic, blog_meta_desc, blog_tags, blog_categories)
+        # Use chatgpt to convert the text into HTML or markdown.
+        if 'html' in output_format:
+            blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
 
-    # Use chatgpt to convert the text into HTML or markdown.
+        # Check if blog needs to be posted on wordpress.
+        if wordpress:
+            # Fixme: Fetch all tags and categories to check, if present ones are present and
+            # use them else create new ones. Its better to use chatgpt than string comparison.
+            # Similar tags and categories will be missed.
+            # blog_categories = 
+            # blog_tags = 
+            main_img_path = compress_image(main_img_path, quality=85)
+            try:
+                img_details = analyze_and_extract_details_from_image(main_img_path)
+                alt_text = img_details.get('alt_text')
+                img_description = img_details.get('description')
+                img_title = img_details.get('title')
+                caption = img_details.get('caption')
+                try:
+                    media = upload_media(wordpress_url, wordpress_username, wordpress_password, 
+                        main_img_path, alt_text, img_description, img_title, caption)
+                except Exception as err:
+                    sys.exit(f"Error occurred in upload_media: {err}")
+            except Exception as e:
+                sys.exit(f"Error occurred in analyze_and_extract_details_from_image: {e}")
+
+            # Then create the post with the uploaded media as the featured image
+            media_id = media['id']
+            blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
+            try:
+                upload_blog_post(wordpress_url, wordpress_username, wordpress_password, a_blog_topic, 
+                        blog_markdown_str, media_id, blog_meta_desc, blog_categories, blog_tags, status='publish')
+            except Exception as err:
+                sys.exit(f"Failed to upload blog to wordpress.Error: {err}")
+
+        # TBD: Save the blog content as a .md file. Markdown or HTML ?
+        save_blog_to_file(blog_markdown_str,
+                a_blog_topic,
+                blog_meta_desc, blog_tags,
+                blog_categories, main_img_path)
 
     # Now, we need perform some *basic checks on the blog content, such as:
     # is_content_ai_generated.py, plagiarism_checker_from_known_sources.py
@@ -167,18 +223,18 @@ def generate_blog_topics(blog_keywords, num_blogs, niche):
     one for generating unique blog content.
     Ex: Generate SEO optimized blog topics on given keywords
     """
-    prompt = f"""As an SEO specialist and blog content writer, please write {num_blogs} catchy 
+    prompt = f"""As an SEO specialist and blog writer, write {num_blogs} catchy
     and SEO-friendly blog topics on {blog_keywords}. The blog title must be less than 80 characters.
-    """
+    The blog titles must follow best SEO practises, be engaging and invite/tempt users to read full blog.
+    Do not include descriptions, explanations. Do not number the result."""
+
     # Beware of keywords stuffing, clustering, semantic should help avoid.
     if num_blogs > 5:
         # Get more keywords, based on user given keywords.
         more_keywords = get_related_keywords(num_blogs, blog_keywords, niche)
         prompt = prompt + """Use the following keywords wisely, without keyword stuffing: {more_keywords}"""
 
-    logger.info(f"prompt used for blog topics: {prompt}\n")
-    # Calculate the max tokens based on the number of blogs
-    max_tokens = min(1000, num_blogs * 100)
+    logger.info(f"Prompt used for generating blog topics: \n{prompt}\n")
     try:
         response = openai_chatgpt(prompt)
         return response
@@ -211,12 +267,13 @@ def generate_topic_outline(blog_title, num_subtopics):
     Given a blog title generate an outline for it
     """
     # TBD: Remove hardcoding, make dynamic
-    prompt = f"""As a SEO expert, suggest only {num_subtopics} 
-        beginner-friendly and insightful sub topic for the blog title: {blog_title}.
-        """
+    prompt = f"""As a SEO expert, suggest only {num_subtopics} beginner-friendly and 
+        insightful sub topics for the blog title: {blog_title}.
+        Respond with only answer and no description, explanations."""
+
     # The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
     # TBD: Include --niche
-    logger.info(f"\nPrompt used for blog title Outline :{prompt}\n\n")
+    logger.info(f"Prompt used for blog title Outline :\n{prompt}\n")
     # TBD: Add logic for which_provider and which_model
     try:
         response = openai_chatgpt(prompt)
@@ -231,41 +288,30 @@ def generate_topic_content(blog_keywords, sub_topic):
     """
     # The outline should contain various subheadings and include the starting sentence for each section.
     # TBD: Depending on the usecase 'Voice and style' will change to professional etc.
-    prompt = (f"As a professional blogger and topic authority on '{blog_keywords}',"
-            f"craft factual (no more than 700 characters) blog content on {sub_topic}."
-            "Your response should reflect Experience, Expertise, Authoritativeness, and Trustworthiness from content."
-            "Voice and style guide: Write in a professional manner, giving enlightening details and reasons."
-            "Use natural language and phrases that a real person would use: in normal conversations."
-            "Format your response using markdown. Use headings, subheadings, bullet points, and bold to organize the information."
-            )
+    prompt = f"""As a professional blogger and topic authority on {blog_keywords},
+            craft factual (no more than 200 characters) subtopic content on {sub_topic}.
+            Your response should reflect Experience, Expertise, Authoritativeness and Trustworthiness from content.
+            Voice and style guide: Write in a professional manner, giving enlightening details and reasons.
+            Use natural language and phrases that a real person would use: in normal conversations.
+            Format your response using markdown. REMEMBER Not to include introduction or conclusion in your response.
+            Use headings(h3 to h6 only), subheadings, bullet points, and bold to organize the information."""
+    logger.info(f"Generate topic content using prompt:\n{prompt}\n")
     try:
-        response = openai_chatgpt(
-            prompt,
-            model="gpt-3.5-turbo",
-            temperature=0.2,
-            max_tokens=1000,
-            top_p=0.9,
-            n=1
-            )
-        text_values = []
-        for choice in response["choices"]:
-            text_values.extend(choice["text"].split("\n"))
-        return (' '.join([element for element in text_values if element]))
+        response = openai_chatgpt(prompt)
+        return response
     except Exception as err:
         SystemError(f"Error in generating topic content: {err}")
 
-    return response.choices[0].text
-
 
 def get_blog_intro(blog_title, blog_topics):
     """
     Generate blog introduction as per title and sub topics
     """
-    prompt = f"""As a skilled wordsmith, I'll equip you with a blog title and relevant topics, tasking you with crafting an engaging introduction. Your challenge: Create a brief, compelling entry that entices readers to explore the entire post. This introduction must be concise (under 250 characters) yet powerful, clearly stating the blog's purpose and what readers stand to gain.
+    prompt = f"""As a skilled wordsmith, I'll equip you with a blog title and relevant topics, tasking you with crafting an engaging introduction. Your challenge: Create a brief, compelling entry that entices readers to explore the entire post. This introduction must be concise (under 250 characters) yet powerful, clearly stating the blog's purpose and what readers stand to gain. Reply with only the introduction.
 
 Intrigue your audience from the start with vibrant language, employing strong verbs and vivid descriptions. Address a common challenge your readers face, demonstrating empathy and positioning yourself as their go-to expert. Pose thought-provoking questions that prompt reader engagement and contemplation.
 
-Remember, your words matter. This introduction serves as the cornerstone of the blog post. It should not only captivate attention but also encourage deeper exploration. Additionally, strategically integrate relevant keywords to enhance visibility on search engine results pages (SERPs). Your mission: Craft an introduction that resonates, leaving readers eager to delve further into the titled piece: '{blog_title}', covering these intriguing sub-topics: {blog_topics}."""
+Remember, your words matter. This introduction serves as the cornerstone of the blog post. It should not only captivate attention but also encourage deeper exploration. Additionally, strategically integrate relevant keywords to enhance visibility on search engine results pages (SERPs). Your mission: Craft a blog introduction that resonates, leaving readers eager to delve further into the titled piece: '{blog_title}', covering these sub-topics: {blog_topics}."""
 
     try:
         # TBD: Add logic for which_provider and which_model
@@ -431,48 +477,6 @@ def get_related_keywords(num_blogs, keywords, niche):
         SystemError(f"Error in getting related keywords.")
 
 
-def convert_markdown_to_html(md_content):
-    """ Helper function to convert given text to HTML 
-    """
-    html_response = openai.ChatCompletion.create(
-          model="gpt-3.5-turbo-16k",
-          messages=[
-            {"role": "system", "content": """
-Convert Markdown to HTML:
-You are a skilled developer tasked with converting a Markdown-formatted text to HTML. You will be given text in markdown format. Follow these steps to perform the conversion:
-
-1. Parse User's Markdown Input: You will receive a Markdown-formatted text as input from the user. Carefully analyze the provided Markdown text, paying attention to different elements such as headings (#), lists (unordered and ordered), bold and italic text, links, images, and code blocks.
-2. Generate and Validate HTML: Generate corresponding HTML code for each Markdown element following the conversion guidelines below. Ensure the generated HTML is well-structured and syntactically correct.
-3. Preserve Line Breaks: Markdown line breaks (soft breaks) represented by two spaces at the end of a line should be converted to <br> tags in HTML to preserve the line breaks.
-4. REMEMBER to generate complete, valid HTML response only.
-
-Follow below Conversion Guidelines:
-- Headers: Convert Markdown headers (#, ##, ###, etc.) to corresponding HTML header tags (<h1>, <h2>, <h3>, etc.).
-- Lists: Convert unordered lists (*) and ordered lists (1., 2., 3., etc.) to <ul> and <ol> HTML tags, respectively. List items should be enclosed in <li> tags.
-- Emphasis: Convert bold (**) and italic (*) text to <strong> and <em> HTML tags, respectively.
-- Links: Convert Markdown links ([text](url)) to HTML anchor (<a>) tags. Ensure the href attribute contains the correct URL.
-- Images: Convert Markdown image tags (![alt text](image_url)) to HTML image (<img>) tags. Include the alt attribute for accessibility.
-- Code: Convert inline code (`code`) to <code> HTML tags. Convert code blocks (```) to <pre> HTML tags for preserving formatting.
-- Blockquotes: Convert blockquotes (>) to <blockquote> HTML tags.
-
-"""
-},
-            {"role": "user", "content": f"Convert the following Markdown text to HTML:\n\n{md_content}"}
-        ],
-          max_tokens=8192,
-          temperature=1,
-          n=1,
-          stream=True
-    )
-    for chunk in response:
-        print(chunk)
-    logger.info("Finished converting markdown to html.")
-    if "choices" in html_response and len(html_response["choices"]) > 0:
-      return html_response["choices"][0]["message"]["content"]
-    else:
-      return None
-
-
 # Helper function
 def remove_stop_words(sentence):
     # Tokenize the sentence into words
@@ -493,36 +497,35 @@ def remove_stop_words(sentence):
 def convert_markdown_to_html(md_content):
     """ Helper function to convert given text to HTML
     """
-    html_response = openai.ChatCompletion.create(
-          model="gpt-3.5-turbo-16k",
-          messages=[
-            {"role": "system", "content": """
-Convert Markdown to HTML:
-You are a skilled developer tasked with converting a Markdown-formatted text to HTML. You will be given text in markdown format. Follow these steps to perform the conversion:
-
-1. Parse User's Markdown Input: You will receive a Markdown-formatted text as input from the user. Carefully analyze the provided Markdown text, paying attention to different elements such as headings (#), lists (unordered and ordered), bold and italic text, links, images, and code blocks.
-2. Generate and Validate HTML: Generate corresponding HTML code for each Markdown element following the conversion guidelines below. Ensure the generated HTML is well-structured and syntactically correct.
-3. Preserve Line Breaks: Markdown line breaks (soft breaks) represented by two spaces at the end of a line should be converted to <br> tags in HTML to preserve the line breaks.
-4. REMEMBER to generate complete, valid HTML response only.
-
-Follow below Conversion Guidelines:
-- Headers: Convert Markdown headers (#, ##, ###, etc.) to corresponding HTML header tags (<h1>, <h2>, <h3>, etc.).
-- Lists: Convert unordered lists (*) and ordered lists (1., 2., 3., etc.) to <ul> and <ol> HTML tags, respectively. List items should be enclosed in <li> tags.
-- Emphasis: Convert bold (**) and italic (*) text to <strong> and <em> HTML tags, respectively.
-- Links: Convert Markdown links ([text](url)) to HTML anchor (<a>) tags. Ensure the href attribute contains the correct URL.
-- Images: Convert Markdown image tags (![alt text](image_url)) to HTML image (<img>) tags. Include the alt attribute for accessibility.
-- Code: Convert inline code (`code`) to <code> HTML tags. Convert code blocks (```) to <pre> HTML tags for preserving formatting.
-- Blockquotes: Convert blockquotes (>) to <blockquote> HTML tags.
-"""
-},
-            {"role": "user", "content": f"Convert the following Markdown text to HTML:\n\n{md_content}"}
-        ],
-          max_tokens=8192,
-          temperature=1,
-          n=1,
-    )
-    logger.info("Finished converting markdown to html.")
-    if "choices" in html_response and len(html_response["choices"]) > 0:
-      return html_response["choices"][0]["message"]["content"]
-    else:
-      return None
+    prompt =f"""
+			You are a skilled web developer tasked with converting a Markdown-formatted text to HTML. 
+            You will be given text in markdown format. Follow these steps to perform the conversion:
+			
+			1. Parse User's Markdown Input: You will receive a Markdown-formatted text as input from the user. 
+            Carefully analyze the provided Markdown text, paying attention to different elements such as headings (#), 
+            lists (unordered and ordered), bold and italic text, links, images, and code blocks.
+			2. Generate and Validate HTML: Generate corresponding HTML code for each Markdown element following 
+            the conversion guidelines below. Ensure the generated HTML is well-structured and syntactically correct.
+			3. Preserve Line Breaks: Markdown line breaks (soft breaks) represented by two spaces at the end of a 
+            line should be converted to <br> tags in HTML to preserve the line breaks.
+			4. REMEMBER to generate complete, valid HTML response only.
+			
+			Follow below Conversion Guidelines:
+			- Headers: Convert Markdown headers (#, ##, ###, etc.) to corresponding HTML header tags (<h1>, <h2>, <h3>, etc.).
+			- Lists: Convert unordered lists (*) and ordered lists (1., 2., 3., etc.) to <ul> and <ol> HTML tags, respectively. 
+            List items should be enclosed in <li> tags.
+			- Emphasis: Convert bold (**) and italic (*) text to <strong> and <em> HTML tags, respectively.
+			- Links: Convert Markdown links ([text](url)) to HTML anchor (<a>) tags. Ensure the href attribute contains the correct URL.
+			- Images: Convert Markdown image tags (![alt text](image_url)) to HTML image (<img>) tags. 
+            Include the alt attribute for accessibility.
+			- Code: Convert inline code (`code`) to <code> HTML tags. Convert code blocks (```) to <pre> HTML tags 
+            for preserving formatting.
+			- Blockquotes: Convert blockquotes (>) to <blockquote> HTML tags.
+			Convert the following Markdown text to HTML:  {md_content}
+            """
+    try:
+        # TBD: Add logic for which_provider and which_model
+        response = openai_chatgpt(prompt)
+        return response
+    except Exception as err:
+        SystemError(f"Error in getting related keywords.")
diff --git a/lib/gpt-researcher b/lib/gpt-researcher
deleted file mode 160000
index 6ada6e23..00000000
--- a/lib/gpt-researcher
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 6ada6e23d0bfe1728203c08f5ec022f374474faf
diff --git a/lib/gpt_online_researcher.py b/lib/gpt_online_researcher.py
new file mode 100644
index 00000000..d7368ff8
--- /dev/null
+++ b/lib/gpt_online_researcher.py
@@ -0,0 +1,69 @@
+################################################################
+#
+# GPT Researcher is an autonomous agent designed for comprehensive online research on a variety of tasks.
+# The agent can produce detailed, factual and unbiased research reports, with customization options for 
+# focusing on relevant resources, outlines, and lessons. Inspired by the recent Plan-and-Solve and RAG papers, 
+# GPT Researcher addresses issues of speed, determinism and reliability, offering a more stable 
+# performance and increased speed through parallelized agent work, as opposed to synchronous operations.
+#
+# The main idea is to run "planner" and "execution" agents, whereas the planner generates questions to research, 
+# and the execution agents seek the most related information based on each generated research question. 
+# Finally, the planner filters and aggregates all related information and creates a research report.
+#
+# The agents leverage both gpt3.5-turbo and gpt-4-turbo (128K context) to complete a research task. 
+# We optimize for costs using each only when necessary. 
+# The average research task takes around 3 minutes to complete, and costs ~$0.1.
+# 
+##############################################################
+
+# import and connect
+from tavily import TavilyClient
+
+def do_research_on(research_query):
+    """
+    Basically sending in the blog title to do research on.
+    gpt-researcher API version to do extensive web research for given keywords.
+    """
+    # $ export TAVILY_API_KEY={Your Tavily API Key here}
+    try:
+        client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
+    except Exception as err:
+        SystemExit(f"Failed to create TavilyClient: {err}")
+
+    try:
+        # run tavily search
+        research_content = client.search(
+                research_query,
+                search_depth="advanced",
+                include_answer=True,
+                max_results=10)["results"]
+    except Exception as err:
+        SystemExit(f"Unable to do tavily search: {err}")
+
+    # setup prompt
+    prompt = [{
+        "role": "system",
+        "content":  f'You are an AI critical thinker research assistant. '\
+                f'Your sole purpose is to write well written, critically acclaimed,'\
+                f'objective and structured reports on given text.'
+        }, {
+        "role": "user",
+        "content": f'Information: """{research_content}"""\n\n' \
+               f'Using the above information, answer the following'\
+               f'query: "{research_query}" in a detailed report --'\
+               f'Please use MLA format and markdown syntax.'
+        }]
+
+    # run gpt-4
+    try:
+        lc_messages = convert_openai_messages(prompt)
+        research_report = ChatOpenAI(
+                model='gpt-4',
+                openai_api_key=openai_api_key
+                ).invoke(lc_messages).content
+    except Exception as err:
+        SystemExit(f"Failed to convert OpenAI message and get response.")
+
+    # print report
+    print(research_report)
+    return research_report
diff --git a/lib/gpt_providers/openai_gpt_provider.py b/lib/gpt_providers/openai_gpt_provider.py
index d70e282c..dd37d59d 100644
--- a/lib/gpt_providers/openai_gpt_provider.py
+++ b/lib/gpt_providers/openai_gpt_provider.py
@@ -8,6 +8,9 @@
 import os
 import sys
 
+import requests
+import re
+import base64
 from tqdm import tqdm, trange
 import time # I wish
 import openai
@@ -28,6 +31,83 @@ logger.add(sys.stdout,
 
 
 
+def analyze_and_extract_details_from_image(image_path):
+    """
+    Analyzes an image using OpenAI's Vision API and extracts Alt Text, Description, Title, and Caption.
+    This module provides functionality to analyze images using OpenAI's Vision API.
+    It encodes an image to a base64 string and sends a request to the OpenAI API
+    to interpret the contents of the image, returning a textual description.
+
+    Args:
+        image_path (str): Path to the image file.
+        api_key (str): Your OpenAI API key.
+
+    Returns:
+        dict: Extracted details including Alt Text, Description, Title, and Caption.
+    """
+    logger.info(f"analyze_and_extract_details_from_image: Encoding image to base64")
+    def encode_image(path):
+        """ Encodes an image to a base64 string. """
+        with open(path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode('utf-8')
+
+    base64_image = encode_image(image_path)
+    logger.info("Using GPT-4 Vision to get generated image details and tags.")
+
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
+    }
+
+    payload = {
+        "model": "gpt-4-vision-preview",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "The given image is used in blog content. Analyze the given image and suggest the following: Alternative text(Alt Text), description, title, caption."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}"
+                        }
+                    }
+                ]
+            }
+        ],
+        "max_tokens": 300
+    }
+
+    try:
+        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+        response.raise_for_status()
+
+        assistant_message = response.json()['choices'][0]['message']['content']
+
+        # Extracting details using regular expressions
+        alt_text_match = re.search(r'Alt Text: "(.*?)"', assistant_message)
+        description_match = re.search(r'Description: (.*?)\n\n', assistant_message)
+        title_match = re.search(r'Title: "(.*?)"', assistant_message)
+        caption_match = re.search(r'Caption: "(.*?)"', assistant_message)
+        image_details = {
+            'alt_text': alt_text_match.group(1) if alt_text_match else None,
+            'description': description_match.group(1) if description_match else None,
+            'title': title_match.group(1) if title_match else None,
+            'caption': caption_match.group(1) if caption_match else None
+        }
+
+        logger.info(f"analyze_and_extract_details_from_image: {image_details}")
+        return image_details
+
+    except requests.RequestException as e:
+        sys.exit(f"Error: Failed to communicate with OpenAI API. Error: {e}")
+    except Exception as e:
+        sys.exit(f"Error occurred: {e}")
+
+
 def openai_chatgpt(prompt, model="gpt-3.5-turbo-16k", temperature=0.2, max_tokens=8192, top_p=0.9, n=1):
     """
     Wrapper function for openai chat Completion
diff --git a/lib/gpt_vision_image_details.py b/lib/gpt_vision_image_details.py
new file mode 100644
index 00000000..4b471163
--- /dev/null
+++ b/lib/gpt_vision_image_details.py
@@ -0,0 +1,91 @@
+"""
+This module provides functionality to analyze images using OpenAI's Vision API. 
+It encodes an image to a base64 string and sends a request to the OpenAI API 
+to interpret the contents of the image, returning a textual description.
+"""
+
+import requests
+import sys
+import re
+import base64
+
+def analyze_and_extract_details_from_image(image_path, api_key):
+    """
+    Analyzes an image using OpenAI's Vision API and extracts Alt Text, Description, Title, and Caption.
+
+    Args:
+        image_path (str): Path to the image file.
+        api_key (str): Your OpenAI API key.
+
+    Returns:
+        dict: Extracted details including Alt Text, Description, Title, and Caption.
+    """
+    def encode_image(path):
+        """ Encodes an image to a base64 string. """
+        with open(path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode('utf-8')
+
+    base64_image = encode_image(image_path)
+
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+
+    payload = {
+        "model": "gpt-4-vision-preview",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "The given image is used in blog content. Analyze the given image and suggest alternative(alt) test, description, title, caption."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}"
+                        }
+                    }
+                ]
+            }
+        ],
+        "max_tokens": 300
+    }
+
+    try:
+        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+        response.raise_for_status()
+
+        assistant_message = response.json()['choices'][0]['message']['content']
+
+        # Extracting details using regular expressions
+        alt_text_match = re.search(r'Alt Text: "(.*?)"', assistant_message)
+        description_match = re.search(r'Description: (.*?)\n\n', assistant_message)
+        title_match = re.search(r'Title: "(.*?)"', assistant_message)
+        caption_match = re.search(r'Caption: "(.*?)"', assistant_message)
+
+        return {
+            'alt_text': alt_text_match.group(1) if alt_text_match else None,
+            'description': description_match.group(1) if description_match else None,
+            'title': title_match.group(1) if title_match else None,
+            'caption': caption_match.group(1) if caption_match else None
+        }
+
+    except requests.RequestException as e:
+        sys.exit(f"Error: Failed to communicate with OpenAI API. Error: {e}")
+    except Exception as e:
+        sys.exit(f"Error occurred: {e}")
+
+
+# Example usage
+if __name__ == "__main__":
+
+    api_key = "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"
+    image_path = "/home/ajsingh/pseo_experiments/pseo_website/assets/generated_image_2023-11-08-21-43-44.png"
+    try:
+        details = analyze_and_extract_details_from_image(image_path, api_key)
+        print(details)
+    except Exception as e:
+        sys.exit(f"Error occurred: {e}")
diff --git a/lib/plagiarism_checker/main.py b/lib/plagiarism_checker/main.py
deleted file mode 100644
index a0615b1d..00000000
--- a/lib/plagiarism_checker/main.py
+++ /dev/null
@@ -1,20 +0,0 @@
-## main.py
-from plagiarism_checker import PlagiarismChecker
-
-
-def main():
-    # Create an instance of the PlagiarismChecker class
-    checker = PlagiarismChecker()
-
-    # Get the input string from the user
-    input_string = input("Enter the input string: ")
-
-    # Check plagiarism in the input string
-    percentage = checker.check_plagiarism(input_string)
-
-    # Print the percentage of original content
-    print(f"The percentage of original content is: {percentage}%")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/lib/plagiarism_checker/plagiarism_checker_from_known_sources.py b/lib/plagiarism_checker_from_known_sources.py
similarity index 100%
rename from lib/plagiarism_checker/plagiarism_checker_from_known_sources.py
rename to lib/plagiarism_checker_from_known_sources.py
diff --git a/lib/stabl_diff_img2html.py b/lib/stabl_diff_img2html.py
index dd8d62ac..02d3c218 100644
--- a/lib/stabl_diff_img2html.py
+++ b/lib/stabl_diff_img2html.py
@@ -13,11 +13,8 @@ os.environ['STABILITY_HOST'] = 'grpc.stability.ai:443'
 # Click on the following link once you have created an account to be taken to your API Key.
 # https://platform.stability.ai/account/keys
 
-# Paste your API Key below.
-os.environ['STABILITY_KEY'] = 'sk-KGCeQFf4iQYogzAe6WEISIOij12g4Ztvnkw92dJTJZ7vsL0j'
-
-def generate_stable_diffusion_image(prompt):
 
+def generate_stable_diffusion_image(prompt, image_dir):
     # Set up our connection to the API.
     # Check out the following link for a list of available engines: 
     # https://platform.stability.ai/docs/features/api-parameters#engine
@@ -63,3 +60,8 @@ def generate_stable_diffusion_image(prompt):
                 img.show()
                 img.save(img_name) 
                 # Save our generated images with their seed number as the filename.
+
+prompt = "An image of a digital marketing campaign with various elements such as social media ads, email marketing, data analysis, and customer interaction. The image should depict the integration of generative AI technologies, such as machine learning algorithms and neural networks, into the digital marketing process. It should showcase how these technologies revolutionize the field by enhancing efficiency, personalization, creativity, decision making, and customer experience. The image should also illustrate the potential for better return on investment (ROI) and hyper-personalization through generative AI in digital marketing."
+
+image_dir = '/home/ajsingh/pseo_experiments/lib'
+generate_stable_diffusion_image(prompt, image_dir)
diff --git a/lib/webhosting_integrations/wix_integration_bard.py b/lib/webhosting_integrations/wix_integration_bard.py
deleted file mode 100644
index c2469ac3..00000000
--- a/lib/webhosting_integrations/wix_integration_bard.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import requests
-import json
-import os
-
-class WixAPI:
-    def __init__(self, api_key, site_id):
-        self.api_key = api_key
-        self.site_id = site_id
-        self.headers = {
-            "Authorization": f"Bearer {self.api_key}"
-        }
-
-    def upload_blog(self, blog_title, blog_content, blog_image=None):
-        """Uploads a blog to a Wix website.
-
-        Args:
-            blog_title: The title of the blog.
-            blog_content: The content of the blog.
-            blog_image: The image for the blog (optional).
-
-        Returns:
-            The ID of the uploaded blog.
-        """
-
-        response = requests.post(
-            f"https://www.wix.com/api/v1/sites/{self.site_id}/blogs",
-            headers=self.headers,
-            json={
-                "title": blog_title,
-                "content": blog_content,
-                "image": blog_image
-            }
-        )
-
-        if response.status_code == 201:
-            return json.loads(response.content)["id"]
-        else:
-            raise Exception(f"Failed to upload blog: {response.status_code}")
-
-def upload_blogs(wix_api, local_directory):
-    """Uploads all blogs from a local directory to a Wix website.
-
-    Args:
-        wix_api: A WixAPI object.
-        local_directory: The local directory containing the blogs.
-    """
-
-    for blog_file in os.listdir(local_directory):
-        blog_path = os.path.join(local_directory, blog_file)
-
-        # Read the blog content from the file.
-        with open(blog_path, "r") as f:
-            blog_content = f.read()
-
-        # Get the blog title from the file name.
-        blog_title = blog_file.split(".")[0]
-
-        # Upload the blog to the Wix website.
-        blog_id = wix_api.upload_blog(blog_title, blog_content)
-
-        print(f"Uploaded blog {blog_title} with ID {blog_id}")
-
-if __name__ == "__main__":
-    # Get the Wix API key.
-    wix_api_key = "IST.eyJraWQiOiJQb3pIX2FDMiIsImFsZyI6IlJTMjU2In0.eyJkYXRhIjoie1wiaWRcIjpcIjk3MDFlNTlhLTJlNmEtNDVhMy1hYmU2LWQ0ZWMxMWI4YWFhY1wiLFwiaWRlbnRpdHlcIjp7XCJ0eXBlXCI6XCJhcHBsaWNhdGlvblwiLFwiaWRcIjpcImNjYmI5OWQxLTk1ZmYtNGRmZC1iNGIxLTYwOWRmNWExNmUwN1wifSxcInRlbmFudFwiOntcInR5cGVcIjpcImFjY291bnRcIixcImlkXCI6XCJhNTZiYTM1Zi02NDUzLTQxMDAtYWM1ZC1lM2M4OGU4YTdjN2RcIn19IiwiaWF0IjoxNjk2NjY4MDE1fQ.XhR3cBfxXhjRIeRL28Y7x0lG7o3pN6Cibpe50rN2saJRxFGyVcQGpWt6R_RnyMaBXQrxyKQcLjpTTSxmdnC6Myby1oCFAHuOpmUoGnYz634J_Epfc2BdwnA2SbnvAEktbOoFhIlMf7is2Xt89bE-h7LUPIejGHdCUucv_F1n6gBY6Bl0KxQhA_9k7M92bKr_mvoncDwTPVoeI_CL6fsQZ19tWzSDfe-DvornEIPId-Pp8Gh-lx9LmyhWepQDxpDDXEtlCEEeWvTB8_6ohOC_Jc2gSp8pw7uEawmoAaaqRKsLPBHFjrdgddKJ9jesWWMXxUGWcvJtBtoB3bZypgJSkQ"
-
-    # Get the Wix site ID.
-    wix_site_id = "a56ba35f-6453-4100-ac5d-e3c88e8a7c7d"
-
-    # Create a WixAPI object.
-    wix_api = WixAPI(wix_api_key, wix_site_id)
-
-    # Get the local directory containing the blogs.
-    local_directory = "/home/ajsingh/pseo_experiments/lib/webhosting_integrations"
-
-    # Upload all blogs from the local directory to the Wix website.
-    upload_blogs(wix_api, local_directory)
-
diff --git a/lib/webhosting_integrations/wordpress_blog_uploader.py b/lib/webhosting_integrations/wordpress_blog_uploader.py
deleted file mode 100644
index f1060d4b..00000000
--- a/lib/webhosting_integrations/wordpress_blog_uploader.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import requests
-import json
-
-
-def upload_blog_post(wordpress_site_url, wordpress_username, wordpress_password, blog_post_title, blog_post_content, blog_post_image_url=None):
-    """
-    Uploads a blog post to a WordPress website.
-
-    Args:
-        wordpress_site_url: The URL of the WordPress website.
-        wordpress_username: The username for the WordPress website.
-        wordpress_password: The password for the WordPress website.
-        blog_post_title: The title of the blog post.
-        blog_post_content: The content of the blog post.
-        blog_post_image_url: The URL of the blog post image.
-
-    Returns:
-        None.
-    """
-
-    # Get the WordPress authentication token.
-    wordpress_auth_token = get_wordpress_auth_token(wordpress_site_url, wordpress_username, wordpress_password)
-
-    # Create the request body.
-    request_body = {
-        "title": blog_post_title,
-        "content": blog_post_content
-    }
-
-    # If a blog post image URL is provided, add it to the request body.
-    if blog_post_image_url:
-        request_body["featured_media"] = blog_post_image_url
-
-    # Make the request to the WordPress API.
-    try:
-        response = requests.post(
-            f"{wordpress_site_url}/wp-json/wp/v2/posts",
-            headers={"Authorization": f"Bearer {wordpress_auth_token}"},
-            json=request_body
-        )
-    except Exception as e:
-        raise e
-
-    # Check the response status code.
-    if response.status_code != 201:
-        raise Exception(f"Failed to upload blog post: {response.status_code}")
-
-    # Print a success message.
-    print("Blog post uploaded successfully!")
-
-
-def get_wordpress_auth_token(wordpress_site_url, wordpress_username, wordpress_password):
-    """
-    Gets the WordPress authentication token.
-
-    Args:
-        wordpress_site_url: The URL of the WordPress website.
-        wordpress_username: The username for the WordPress website.
-        wordpress_password: The password for the WordPress website.
-
-    Returns:
-        A string containing the WordPress authentication token.
-    """
-
-    # Create the request body.
-    request_body = {
-        "username": wordpress_username,
-        "password": wordpress_password
-    }
-
-    # Make the request to the WordPress API.
-    try:
-        response = requests.post(
-            f"{wordpress_site_url}/wp-json/jwt-auth/v1/token",
-            json=request_body
-        )
-    except Exception as e:
-        raise e
-
-    # Check the response status code.
-    if response.status_code != 200:
-        raise Exception(f"Failed to get WordPress authentication token: {response.status_code}")
-
-    # Return the WordPress authentication token.
-    return response.json()["token"]
-
-
-# Sample usage:
-
-# Get the WordPress site URL, username, and password.
-wordpress_site_url = "https://example.com"
-wordpress_username = "YOUR_WORDPRESS_USERNAME"
-wordpress_password = "YOUR_WORDPRESS_PASSWORD"
-
-# Upload the blog post.
-try:
-    upload_blog_post(wordpress_site_url, wordpress_username, wordpress_password,
-                     "My first blog post", "This is my first blog post.")
-except Exception as e:
-    print(e)
-
diff --git a/lib/wordpress_blog_uploader.py b/lib/wordpress_blog_uploader.py
new file mode 100644
index 00000000..5dffcda5
--- /dev/null
+++ b/lib/wordpress_blog_uploader.py
@@ -0,0 +1,303 @@
+import os
+import sys
+
+import mimetypes
+import requests
+from requests.auth import HTTPBasicAuth
+import base64
+import json
+from clint.textui import progress
+
+from PIL import Image
+import tempfile
+import os
+
+from loguru import logger
+logger.remove()
+logger.add(sys.stdout,
+        colorize=True,
+        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
+    )
+
+
+def compress_image(image_path, quality=85):
+    """
+    Compress the image by reducing its quality and logger.info size information.
+
+    :param image_path: Path to the original image
+    :param quality: Quality of the output image (1-100), lower means more compression
+    :return: Path to the compressed image
+    """
+    if not os.path.exists(image_path):
+        raise ValueError(f"Provided image path does not exist: {image_path}")
+
+    # Get the size of the original image
+    original_size = os.path.getsize(image_path)
+
+    # Open the image
+    with Image.open(image_path) as img:
+        # Define the format based on the original image format
+        img_format = img.format
+
+        # Create a temporary file to save the compressed image
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.' + img_format.lower())
+
+        # Save the image with reduced quality
+        img.save(temp_file, format=img_format, quality=quality, optimize=True)
+
+        # Get the size of the compressed image
+        compressed_size = os.path.getsize(temp_file.name)
+
+        # Calculate the percentage reduction
+        reduction = (1 - (compressed_size / original_size)) * 100
+        logger.info("########### Image Compression ###############")
+        logger.info(f"Compressing the image, Original size: {original_size / 1024:.2f} KB")
+        logger.info(f"Compressed size: {compressed_size / 1024:.2f} KB")
+        logger.info(f"Reduction in image size: {reduction:.2f}%")
+        # TBD: https://tinypng.com/developers/reference/python
+        logger.info(f"Note: Consider converting images to JPEG/WebP format.\n\n")
+
+        return temp_file.name
+
+
+def create_wordpress_tag(url, username, app_password, tag_name):
+    """
+    Create a new tag in WordPress using the REST API and return its ID.
+
+    :param url: URL of the WordPress site (e.g., 'https://example.com')
+    :param username: WordPress username
+    :param app_password: WordPress application password
+    :param tag_name: Name of the tag to be created
+    :return: ID of the created tag or error message
+    """
+    api_endpoint = f"{url}/wp-json/wp/v2/tags"
+    headers = {
+        'Content-Type': 'application/json',
+    }
+    data = {
+        'name': tag_name,
+    }
+    response = requests.post(api_endpoint, json=data, auth=HTTPBasicAuth(username, app_password), headers=headers)
+    
+    if response.status_code == 201:
+        return response.json().get('id')  # Return the ID of the created tag
+    else:
+        return response.text
+
+
+def create_wordpress_category(url, username, app_password, category_name):
+    """
+    Create a new category in WordPress using the REST API and return its ID.
+
+    :param url: URL of the WordPress site (e.g., 'https://example.com')
+    :param username: WordPress username
+    :param app_password: WordPress application password
+    :param category_name: Name of the category to be created
+    :return: ID of the created category or error message
+    """
+    api_endpoint = f"{url}/wp-json/wp/v2/categories"
+    headers = {
+        'Content-Type': 'application/json',
+    }
+    data = {
+        'name': category_name,
+    }
+    response = requests.post(api_endpoint, json=data, auth=HTTPBasicAuth(username, app_password), headers=headers)
+    
+    if response.status_code == 201:
+        return response.json().get('id')  # Return the ID of the created category
+    else:
+        return response.text
+
+
+def get_all_wordpress_categories(url, username, password):
+    """
+    Get all categories from WordPress.
+
+    :param url: URL of the WordPress site
+    :param username: WordPress username
+    :param password: WordPress application password
+    :return: Dictionary of category names and their IDs
+    """
+    logger.info("Fetching all wordpress categories to create Or use exsiting.")
+    categories = {}
+    api_endpoint = f"{url}/wp-json/wp/v2/categories"
+    response = requests.get(api_endpoint, auth=HTTPBasicAuth(username, password))
+
+    if response.status_code == 200:
+        for category in response.json():
+            categories[category['name']] = category['id']
+        return categories
+    else:
+        return "Error: " + response.text
+
+
+def get_all_wordpress_tags(url, username, password):
+    """
+    Get all tags from WordPress.
+
+    :param url: URL of the WordPress site
+    :param username: WordPress username
+    :param password: WordPress application password
+    :return: Dictionary of tag names and their IDs
+    """
+    logger.info("Fetching all tags from wordpress to create or use existing tag.")
+    tags = {}
+    api_endpoint = f"{url}/wp-json/wp/v2/tags"
+    response = requests.get(api_endpoint, auth=HTTPBasicAuth(username, password))
+
+    if response.status_code == 200:
+        for tag in response.json():
+            tags[tag['name']] = tag['id']
+        return tags
+    else:
+        return "Error: " + response.text
+
+
+def create_or_get_wordpress_category(url, username, password, category_name):
+    """
+    Create a new category or get existing one from WordPress.
+
+    :param url: URL of the WordPress site
+    :param username: WordPress username
+    :param password: WordPress application password
+    :param category_name: Name of the category
+    :return: ID of the category
+    """
+    existing_categories = get_all_wordpress_categories(url, username, password)
+    if category_name in existing_categories:
+        return existing_categories[category_name]
+    else:
+        return create_wordpress_category(url, username, password, category_name)
+
+
+def create_or_get_wordpress_tag(url, username, password, tag_name):
+    """
+    Create a new tag or get existing one from WordPress.
+
+    :param url: URL of the WordPress site
+    :param username: WordPress username
+    :param password: WordPress application password
+    :param tag_name: Name of the tag
+    :return: ID of the tag
+    """
+    existing_tags = get_all_wordpress_tags(url, username, password)
+    if tag_name in existing_tags:
+        return existing_tags[tag_name]
+    else:
+        return create_wordpress_tag(url, username, password, tag_name)
+
+
+def upload_media(url, username, password, media_path, alt_text, description, title, caption):
+    """
+    Upload media to WordPress site with alt text, description, title, and caption.
+
+    :param url: URL of your WordPress site
+    :param username: Your WordPress username
+    :param password: Your WordPress password
+    :param media_path: Path to the media file
+    :param alt_text: Alternative text for the image
+    :param description: Description of the media
+    :param title: Title of the media
+    :param caption: Caption for the media
+    """
+    if not os.path.exists(media_path):
+        logger.info(f"File not found: {media_path}")
+        return None
+
+    mime_type, _ = mimetypes.guess_type(media_path)
+    if mime_type is None:
+        logger.info(f"Unable to determine MIME type for the file: {media_path}")
+        return None
+
+    credentials = username + ':' + password
+    token = base64.b64encode(credentials.encode())
+    header = {
+        'Authorization': 'Basic ' + token.decode('utf-8'),
+        'Content-Disposition': 'attachment; filename={}'.format(os.path.basename(media_path))
+    }
+
+    with open(media_path, 'rb') as media:
+        media_name = os.path.basename(media_path)
+        files = {'file': (media_name, media, mime_type)}
+
+        # Upload the media file
+        response = requests.post(url + '/wp-json/wp/v2/media', headers=header, files=files)
+
+        if response.status_code == 201:
+            logger.info("Media uploaded successfully.")
+            media_id = response.json()['id']
+
+            # Update media with alt text, description, title, and caption
+            media_data = {
+                'alt_text': alt_text,
+                'description': description,
+                'title': title,
+                'caption': caption
+            }
+
+            media_update_response = requests.post(f"{url}/wp-json/wp/v2/media/{media_id}", headers=header, json=media_data)
+
+            if media_update_response.status_code == 200:
+                logger.info("Media updated with alt text, description, title, and caption successfully.")
+                return media_update_response.json()
+            else:
+                logger.error("Failed to update media.")
+                logger.error(f"Response:{media_update_response.content}")
+                return None
+        else:
+            logger.error("Failed to upload media.")
+            logger.error("Response:{response.content}")
+            return None
+
+
+
+def upload_blog_post(url, username, password, title, content, media_id, meta_desc, categories=None, tags=None, status='draft'):
+    """
+    Upload a blog post to a WordPress site.
+    https://developer.wordpress.org/rest-api/reference/posts/#create-a-post
+
+    :param url: URL of your WordPress site
+    :param username: Your WordPress username
+    :param password: Your WordPress password
+    :param title: Title of the blog post
+    :param content: Content of the blog post
+    :param media_id: ID of the uploaded media to be set as the featured image
+    :param categories: List of category IDs
+    :param tags: List of tag IDs
+    :param status: Status of the post ('draft', 'publish', etc.)
+    """
+    credentials = username + ':' + password
+    token = base64.b64encode(credentials.encode())
+    header = {'Authorization': 'Basic ' + token.decode('utf-8')}
+
+    # Prepare the data for the post
+    # https://developer.wordpress.org/rest-api/reference/posts/#schema-meta
+    post = {
+        'title': title,
+        'content': content,
+        # One of: publish, future, draft, pending, private
+        'status': status,
+        'excerpt': meta_desc,
+        'featured_media': media_id,
+        #'categories': categories,
+        #'tags': tags,
+
+        'meta': {
+            'description': meta_desc  # This depends on your WordPress setup
+        }
+    }
+    #if categories:
+    #    post['categories'] = categories
+
+    # Make the request
+    response = requests.post(url + '/wp-json/wp/v2/posts', headers=header, json=post)
+    
+    # Check response
+    if response.status_code == 201:
+        logger.info("Blog to wordpress, uploaded successfully.")
+        return json.loads(response.content)
+    else:
+        logger.error("Blog upload to wordpress Failed.")
+        logger.error(f"Response: {response.content}")  # Print response content for debugging
+        return None