WIP- Try AI-Writer and Web research; working. Working on usuability aspects.

2024-03-06 15:39:02 +05:30
parent 16e68c8959
commit 27f6952dd0
29 changed files with 399 additions and 602 deletions
--- a/lib/ai_web_researcher/google_serp_search.py
+++ b/lib/ai_web_researcher/google_serp_search.py
@@ -66,23 +66,23 @@ def google_search(query):
    Returns:
        list: List of search results based on the specified flag.
    """
-    try:
-        perform_serpapi_google_search(query)
-        logger.info(f"FIXME: Google serapi: {query}")
-        #return process_search_results(search_result)
-    except Exception as err:
-        logger.error(f"ERROR: Check Here: https://serpapi.com/. Your requests may be over. {err}")
+    #try:
+    #    perform_serpapi_google_search(query)
+    #    logger.info(f"FIXME: Google serapi: {query}")
+    #    #return process_search_results(search_result)
+    #except Exception as err:
+    #    logger.error(f"ERROR: Check Here: https://serpapi.com/. Your requests may be over. {err}")

    # Retry with serper.dev
    try:
        logger.info("Trying Google search with Serper.dev: https://serper.dev/api-key")
        search_result = perform_serperdev_google_search(query)
        process_search_results(search_result)
+        return(search_result)
    except Exception as err:
        logger.error(f"Failed to do Google search with serper.dev: {err}")

-    return(search_result)
-    
+ 
 #    # Retry with BROWSERLESS API
 #    try:
 #        search_result = perform_browserless_google_search(query)
@@ -118,7 +118,10 @@ def perform_serpapi_google_search(query, location="in"):
    try:
        # Check if API key is provided
        if not os.getenv("SERPAPI_KEY"):
-            raise ValueError("SERPAPI_KEY key is required for SerpApi")
+            #raise ValueError("SERPAPI_KEY key is required for SerpApi")
+            logger.error("SERPAPI_KEY key is required for SerpApi")
+            return
+            

        # Create a GoogleSearch instance
        search = GoogleSearch({
@@ -164,7 +167,7 @@ def perform_serperdev_google_search(query):
        "q": query,
        "gl": "in",
        "hl": "en",
-        "num": 5,
+        "num": 10,
        "autocorrect": True,
        "page": 1,
        "type": "search",
--- a/lib/ai_web_researcher/google_trends_researcher.py
+++ b/lib/ai_web_researcher/google_trends_researcher.py
@@ -23,6 +23,8 @@ Note: Ensure that the required libraries are installed using 'pip install pytren
 """

 import os
+import time # I wish
+import random
 import requests
 import numpy as np
 import sys
@@ -186,6 +188,7 @@ def get_related_topics_and_save_csv(search_keywords):
            data = pytrends.related_topics()
        except Exception as err:
            logger.error(f"Failed to get pytrends realted topics: {err}")
+            return
        # Extract data from the result
        top_topics = list(data.values())[0]['top']
        rising_topics = list(data.values())[0]['rising']
@@ -238,6 +241,8 @@ def get_results(query):
    try:
        query = urllib.parse.quote_plus(query)
        response = get_source(f"https://suggestqueries.google.com/complete/search?output=chrome&hl=en&q={query}")
+        time.sleep(random.uniform(0.1, 0.6))
+
        if response:
            response.raise_for_status()
            results = json.loads(response.text)
@@ -501,6 +506,8 @@ def do_google_trends_analysis(search_term):
            else:
                all_the_keywords.append(suggestions_df['Keywords'].tolist())
            all_the_keywords = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in all_the_keywords])
+            # Generate a random sleep time between 2 and 3 seconds 
+            time.sleep(random.uniform(2, 3))

 #        
 #        # FIXME: Get result from vision GPT. Fetch and visualize Google Trends data
@@ -510,12 +517,16 @@ def do_google_trends_analysis(search_term):
 #        result_df = plot_interest_by_region(search_term)
 #        
        # Display additional information
-        result_df = get_related_topics_and_save_csv(search_term)
-        # Extract 'Top' topic_title
-        top_topic_title = result_df['topic_title'].values.tolist()
-        # Join each sublist into one string separated by comma
-        #top_topic_title = [','.join(filter(None, map(str, sublist))) for sublist in top_topic_title]
-        top_topic_title = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in top_topic_title])
+        try:
+            result_df = get_related_topics_and_save_csv(search_term)
+            # Extract 'Top' topic_title
+            if result_df:
+                top_topic_title = result_df['topic_title'].values.tolist()
+                # Join each sublist into one string separated by comma
+                #top_topic_title = [','.join(filter(None, map(str, sublist))) for sublist in top_topic_title]
+                top_topic_title = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in top_topic_title])
+        except Exception as err:
+            logger.error(f"Failed to get results from google trends related topics: {err}")

        # TBD: Not getting great results OR unable to understand them.
        #all_the_keywords += top_topic_title
--- a/lib/ai_web_researcher/gpt_online_researcher.py
+++ b/lib/ai_web_researcher/gpt_online_researcher.py
@@ -9,7 +9,6 @@ import json
 from pathlib import Path
 import sys
 from typing import List, NamedTuple
-from loguru import logger
 from datetime import datetime

 from ..gpt_providers.gemini_pro_text import gemini_text_response
@@ -17,8 +16,9 @@ from .tavily_ai_search import get_tavilyai_results
 from .metaphor_basic_neural_web_search import metaphor_find_similar, metaphor_search_articles
 from .google_serp_search import google_search
 from .google_trends_researcher import do_google_trends_analysis
-from .web_research_report import write_web_research_report
+#from .web_research_report import write_web_research_report

+from loguru import logger
 # Configure logger
 logger.remove()
 logger.add(sys.stdout,
--- a/lib/ai_web_researcher/tavily_ai_search.py
+++ b/lib/ai_web_researcher/tavily_ai_search.py
@@ -66,7 +66,7 @@ def get_tavilyai_results(keywords, include_urls, search_depth="advanced"):
    # Retrieve API keys
    api_key = os.getenv('TAVILY_API_KEY')
    if not api_key:
-        raise ValueError("API keys for Tavily or OpenAI are not set.")
+        raise ValueError("API keys for Tavily is Not set.")

    # Initialize Tavily client
    try:
--- a/lib/ai_web_researcher/web_research_report.py
+++ b/lib/ai_web_researcher/web_research_report.py
@@ -1,11 +1,10 @@
-from langchain.adapters.openai import convert_openai_messages
-from langchain.chat_models import ChatOpenAI
-
+import os
 from ..gpt_providers.gemini_pro_text import gemini_text_response


-def write_web_research_report(web_research, faq_questions, gpt_provider="gemini"):
+def write_web_research_report(web_research, faq_questions):
    """ """
+    gpt_provider = os.environ["GPT_PROVIDER"]
    if "gemini" in gpt_provider:
            prompt = ["You are an SEO and marketing expert, who writes unique, factual and comprehensive research reports."
                    "I will provide you web research report as json data and a list of related FAQ questions."
--- a/lib/ai_writers/blog_from_google_serp.py
+++ b/lib/ai_writers/blog_from_google_serp.py
@@ -34,14 +34,14 @@ def write_blog_google_serp(search_keyword, search_results):
        Google search Result: "{search_results}"
        """
    logger.info("Generating blog and FAQs from web search result.")
-    if 'google' in gpt_providers:
+    if 'google' in gpt_providers.lower():
        try:
            response = gemini_text_response(prompt)
            return response
        except Exception as err:
            logger.error(f"Failed to get response from gemini: {err}")
            raise err
-    elif 'openai' in gpt_providers:
+    elif 'openai' in gpt_providers.lower():
        try:
            logger.info("Calling OpenAI LLM.")
            response = openai_chatgpt(prompt)
--- a/lib/ai_writers/combine_blog_and_keywords.py
+++ b/lib/ai_writers/combine_blog_and_keywords.py
@@ -30,7 +30,7 @@ def blog_with_keywords(blog, keywords):
        list of keywords: '{keywords}'
        """

-    if 'google' in gpt_providers:
+    if 'google' in gpt_providers.lower():
        prompt = f"""You are an expert copywriter specializing in content optimization for SEO. 
        I will provide you with my 'blog content' and 'list of keywords' on the same topic.
        Your task is to write an original blog, using the given keywords and blog content.
@@ -39,7 +39,6 @@ def blog_with_keywords(blog, keywords):
        Always, include figures, data, results from given content.
        It is important that your blog is original and unique. It should be highly readable and SEO optimized.

-
        Blog content: '{blog}'
        list of keywords: '{keywords}'
        """
@@ -49,7 +48,7 @@ def blog_with_keywords(blog, keywords):
        except Exception as err:
            logger.error(f"Failed to get response from gemini: {err}")
            raise err
-    elif 'openai' in gpt_providers:
+    elif 'openai' in gpt_providers.lower():
        try:
            logger.info("Calling OpenAI LLM.")
            response = openai_chatgpt(prompt)
--- a/lib/ai_writers/combine_research_and_blog.py
+++ b/lib/ai_writers/combine_research_and_blog.py
@@ -20,10 +20,10 @@ def blog_with_research(report, blog):
    """Combine the given online research and gpt blog content"""
    gpt_providers = os.environ["GPT_PROVIDER"]
    prompt = f"""
-        You are an expert copywriter specializing in content optimization for SEO.
+        You are an expert copywriter specializing in SEO content optimization for blogs.
        I will provide you with a 'research report' and a 'blog content' on the same topic.
-        Your task is to transform and combine the given research and blog content into a well-structured markdown, unique
-        and engaging blog article.
+        Your task is to transform and combine the given 'research report' and 'blog content' into a well-structured, unique
+        and original blog article.

        Your objectives include:
        1. Master the report and blog content: Understand main ideas, key points, and the core message.
@@ -47,11 +47,11 @@ def blog_with_research(report, blog):
        that will rank well in search engine results and engage readers effectively.

        Create a blog post, in markdown, from the given research report and blog content below.
-        Research report: {report}
-        Blog content: {blog}
+        Research report: '{report}'
+        Blog content: '{blog}'
        """

-    if 'google' in gpt_providers:
+    if 'google' in gpt_providers.lower():
        prompt = f"""You are an expert copywriter specializing in content optimization for SEO. 
        I will provide you with my 'research report' and 'blog content' on the same topic.
        Your task is to transform and combine the given research and blog content into a blog article.
@@ -70,7 +70,7 @@ def blog_with_research(report, blog):
        except Exception as err:
            logger.error(f"Failed to get response from gemini: {err}")
            raise err
-    elif 'openai' in gpt_providers:
+    elif 'openai' in gpt_providers.lower():
        try:
            logger.info("Calling OpenAI LLM.")
            response = openai_chatgpt(prompt)
@@ -78,3 +78,6 @@ def blog_with_research(report, blog):
        except Exception as err:
            logger.error(f"failed to get response from Openai: {err}")
            raise err
+    else:
+        logger.error(f"Unrecognised/Un-Supoorted GPT_PROVIDER: {gpt_providers}\n")
+        return
--- a/lib/ai_writers/gpt_blog_sections.py
+++ b/lib/ai_writers/gpt_blog_sections.py
@@ -1,4 +1,5 @@
 import sys
+import os
 import json

 from ..gpt_providers.openai_chat_completion import openai_chatgpt
@@ -13,9 +14,9 @@ logger.add(sys.stdout,


 # FIXME: Provide num_blogs, num_faqs as inputs.
-def get_blog_sections_from_websearch(search_keyword, search_results, gpt_providers="gemini"):
+def get_blog_sections_from_websearch(search_keyword, search_results):
    """Combine the given online research and gpt blog content"""
-
+    gpt_providers = os.environ["GPT_PROVIDER"]
    prompt = f"""
        As a SEO expert and content writer, I will provide you with a search keyword and its google search result.
        Your task is to write a blog title and 5 blog sub titles, from the given google search result.
--- a/lib/ai_writers/keywords_to_blog.py
+++ b/lib/ai_writers/keywords_to_blog.py
@@ -1,5 +1,6 @@
 import sys
 import os
+from textwrap import dedent
 from pathlib import Path
 from datetime import datetime

@@ -32,41 +33,42 @@ def write_blog_from_keywords(search_keywords, url=None):
    # TBD: Keeping the results directory as fixed, for now.
    os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "workspace", "web_research_reports",
            search_keywords.replace(" ", "_") + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
-    logger.info(f"Researching and Writing Blog on keywords: {search_keywords}")
    # Use to store the blog in a string, to save in a *.md file.
    blog_markdown_str = ""
    example_blog_titles = []
+    
+    logger.info(f"Researching and Writing Blog on keywords: {search_keywords}")
    # Call on the got-researcher, tavily apis for this. Do google search for organic competition.
    google_search_result, g_titles = do_google_serp_search(search_keywords)
    example_blog_titles.append(g_titles)
    blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result)
    # logger.info/check the final blog content.
-    logger.info(f"Final blog content: {blog_markdown_str}")
+    logger.info(f"######### Blog content Google SERP research: ###########\n\n{blog_markdown_str}\n\n")

    # Do Tavily AI research to augument the above blog.
    tavily_search_result, t_titles = do_tavily_ai_search(search_keywords)
    example_blog_titles.append(t_titles)
-    blog_markdown_str = blog_with_research(blog_markdown_str, tavily_search_result)
-    logger.info(f"Final blog content: {blog_markdown_str}")
+    if tavily_search_result:
+        blog_markdown_str = blog_with_research(blog_markdown_str, tavily_search_result)
+        logger.info(f"######### Blog content after Tavily AI research: ######### \n\n{blog_markdown_str}\n\n")

    try:
        # Do Metaphor/Exa AI search.
        metaphor_search_result, m_titles = do_metaphor_ai_research(search_keywords)
        example_blog_titles.append(m_titles)
        blog_markdown_str = blog_with_research(blog_markdown_str, metaphor_search_result)
-        logger.info(f"Final blog content: {blog_markdown_str}")
+        logger.info(f"######## Blog content after EXA AI research: ########## \n\n{blog_markdown_str}\n\n")
    except Exception as err:
        logger.error(f"Failed to do Metaphor AI search: {err}")

    # Do Google trends analysis and combine with latest blog.
    try:
        pytrends_search_result = do_google_pytrends_analysis(search_keywords)
+        logger.info(f"Google Trends keywords to use in the blog: {pytrends_search_result}\n")
        blog_markdown_str = blog_with_keywords(blog_markdown_str, pytrends_search_result)
    except Exception as err:
        logger.error(f"Failed to do Google Trends Analysis:{err}")
-        
-    blog_markdown_str = blog_proof_editor(blog_markdown_str, search_keywords)
-    logger.info(f"Final blog content: {blog_markdown_str}")
+    logger.info(f"########### Blog Content After Google Trends Analysis:######### \n {blog_markdown_str}\n\n")

    # Combine YOU.com RAG search with the latest blog content.
    #you_rag_result = get_rag_results(search_keywords)
@@ -74,6 +76,8 @@ def write_blog_from_keywords(search_keywords, url=None):
    #blog_markdown_str = blog_with_research(blog_markdown_str, you_search_result)
    #logger.info(f"Final blog content: {blog_markdown_str}")

+    blog_markdown_str = blog_proof_editor(blog_markdown_str, search_keywords)
+
    blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str, 
            search_keywords, example_blog_titles)

@@ -92,4 +96,12 @@ def write_blog_from_keywords(search_keywords, url=None):
    # TBD: Save the blog content as a .md file. Markdown or HTML ?
    save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath)

+    blog_frontmatter = dedent(f"""\n\n\n\
+                ---
+                title: {blog_title}
+                categories: [{blog_categories}]
+                tags: [{blog_tags}]
+                Meta description: {blog_meta_desc.replace(":", "-")}
+                ---\n\n""")
+    logger.info(f"{blog_frontmatter}{blog_markdown_str}")
    logger.info(f"\n\n ################ Finished writing Blog for : {search_keywords} #################### \n")
--- a/lib/blog_metadata/get_blog_category.py
+++ b/lib/blog_metadata/get_blog_category.py
@@ -27,13 +27,13 @@ def get_blog_categories(blog_article):
            The blog content is: '{blog_article}'"
    """
    logger.info("Generating blog categories for the given blog.")
-    if 'google' in gpt_providers:
+    if 'google' in gpt_providers.lower():
        try:
            response = gemini_text_response(prompt)
            return response
        except Exception as err:
            logger.error(f"Failed to get response from gemini: {err}")
-    elif 'openai' in gpt_providers:
+    elif 'openai' in gpt_providers.lower():
        try:
            response = openai_chatgpt(prompt)
            return response
--- a/lib/blog_metadata/get_blog_meta_desc.py
+++ b/lib/blog_metadata/get_blog_meta_desc.py
@@ -27,13 +27,13 @@ def generate_blog_description(blog_content):
        Respond with only one of your best effort and do not include your explanations. 
        Blog Content: '{blog_content}'"""

-    if 'google' in gpt_providers:
+    if 'google' in gpt_providers.lower():
        try:
            response = gemini_text_response(prompt)
            return response
        except Exception as err:
            logger.error("Failed to get response from gemini.")
-    elif 'openai' in gpt_providers:
+    elif 'openai' in gpt_providers.lower():
        try:
            response = openai_chatgpt(prompt)
            return response
--- a/lib/blog_metadata/get_blog_title.py
+++ b/lib/blog_metadata/get_blog_title.py
@@ -42,13 +42,22 @@ def generate_blog_title(blog_article, keywords=None, example_titles=None, num_ti
            Blog Keywords: '{keywords}'
            Example Titles: '{example_titles}'
        """
-    if 'google' in gpt_providers:
+    elif not example_titles:
+        prompt = prompt = f"""As a SEO expert, I will provide you with my blog article.
+            Your task is to write {num_titles} blog title.
+            Follow SEO best practises to suggest the blog title.
+            Please keep the titles concise, not exceeding 60 words.
+            Respond with only {num_titles} title and no explanations.
+            Negative Keywords: Unvieling, unleash, power of. Dont use such words in your title.
+            Blog Article: '{keywords}'
+        """
+    if 'google' in gpt_providers.lower():
        try:
            response = gemini_text_response(prompt)
            return response
        except Exception as err:
            logger.error(f"Failed to get response from gemini: {err}") 
-    elif 'openai' in gpt_providers:
+    elif 'openai' in gpt_providers.lower():
        try:
            logger.info("Calling OpenAI LLM.")
            response = openai_chatgpt(prompt)
--- a/lib/blog_metadata/get_tags.py
+++ b/lib/blog_metadata/get_tags.py
@@ -25,13 +25,13 @@ def get_blog_tags(blog_article):
         for the given blog content. Only reply with comma separated values. 
         Blog content:  {blog_article}."""
    logger.info("Generating Blog tags for the given blog post.")
-    if 'google' in gpt_providers:
+    if 'google' in gpt_providers.lower():
        try:
            response = gemini_text_response(prompt)
            return response
        except Exception as err:
            logger.error("Failed to get response from gemini.")
-    elif 'openai' in gpt_providers:
+    elif 'openai' in gpt_providers.lower():
        try:
            response = openai_chatgpt(prompt)
            return response
--- a/lib/blog_postprocessing/blog_proof_reader.py
+++ b/lib/blog_postprocessing/blog_proof_reader.py
@@ -26,19 +26,19 @@ def blog_proof_editor(blog_content, blog_keywords):
        4). Tone and Brand Alignment: Adjust the tone, voice, personality of given content to make it unique.
        5). Optimize Content Structure: Reorganize the content for a more impactful presentation, 
        including better paragraphing and transitions.
-        6). Simplify given content: Simplify concepts and replace overly complex jargons and words.
+        6). Simplify content: Simplify concepts and replace overly complex words. Use simple english words.
        7). Refine Overall Structure: Make structural changes to improve the overall impact of the content.

        \n\nMain keywords: '{blog_keywords}'
        My Blog: '{blog_content}'. """

-    if 'openai' in gpt_provider:
+    if 'openai' in gpt_provider.lower():
        try:
            response = openai_chatgpt(prompt)
            return response
        except Exception as err:
            SystemError(f"Openai Error Blog Proof Reading: {err}")
-    elif 'google' in gpt_provider:
+    elif 'google' in gpt_provider.lower():
        try:
            response = gemini_text_response(prompt)
            return response
--- a/lib/blog_postprocessing/convert_content_to_markdown.py
+++ b/lib/blog_postprocessing/convert_content_to_markdown.py
@@ -57,13 +57,13 @@ def convert_tomarkdown_format(blog_content, gpt_provider="openai"):
    
    Blog Post: '{blog_content}'"""
    
-    if 'openai' in gpt_provider:
+    if 'openai' in gpt_provider.lower():
        try:
            response = openai_chatgpt(prompt)
            return response
        except Exception as err:
            SystemError(f"Openai Error in converting to Markdown format.")
-    elif 'gemini' in gpt_provider:
+    elif 'gemini' in gpt_provider.lower():

        prompt = f""" Convert the given blog post into well structured MARKDOWN content. 
        Do not alter the given blog post.
--- a/lib/gpt_providers/openai_gpt_provider.py
+++ b/lib/gpt_providers/openai_gpt_provider.py
@@ -17,14 +17,8 @@ import openai
 from openai import OpenAI
 from pytube import YouTube
 import tempfile
-from html2image import Html2Image
 import datetime
 from PIL import Image
-import moviepy.editor as mp
-import requests
-from moviepy.editor import AudioFileClip
-from concurrent.futures import ThreadPoolExecutor
-

 from loguru import logger
 logger.remove()