Agentic content creation, web researched

2024-04-26 16:07:17 +05:30
parent 45d324a2a9
commit 20c201f4f9
8 changed files with 198 additions and 231 deletions
--- a/README.md
+++ b/README.md
@@ -90,12 +90,22 @@ Congratulations: Once you've cloned the repository, you can proceed with the nex
 ---


-### Option 3: Web URL 🌐 *(For easy access)*
+## Packages, Tools, and APIs Used

-Coming Soon....
+### Standing on the shoulders of Giants - Credits:
+- **APIs**:
+  - [Exa API](https://exa.ai/): Provides semantic search capabilities for finding similar topics and technologies.
+  - [Tavily API](https://tavily.com/): Offers AI-powered web search functionality for conducting in-depth keyword research.
+  - [SerperDev API](https://serper.dev/): Enables access to search engine results and competitor analysis data.
+  - [YOU.com](https://you.com/): You.com enhances web search, writing, coding, digital art creation, and solving complex problems.
+  - [Stability AI](https://stability.ai/): Activating humanity's potential through generative AI. 
+    Open models in every modality, for everyone, everywhere.
+  - [OpenAI API](https://openai.com/): Powers the Large Language Models (LLMs) for generating blog content and conducting research.
+  - [Gemini API](https://gemini.google.com/app): Google powered LLM for natural language processing tasks.
+  - [Ollama](https://ollama.com/) : Local, Privacy focused, LLM provider for research and content generation capabilities.
+  - [CrewAI](https://www.crewai.com/): Collaborative AI agents framework.
 ---

-
 ## Features

 - **Online Research Integration**: Enhances blog content by integrating insights and information gathered from online research, ensuring the content is informative and up-to-date. This gives context for generating content. Tavily AI, Google search, serp and Vision AI is used to scrape web data for context augumentation. TBD: Include CrewAI for web research agents.
--- a/alwrity.py
+++ b/alwrity.py
@@ -83,7 +83,8 @@ def start_interactive_mode():
        elif mode == 'AI Image to Text Writer':
            image_to_text_writer()
        elif mode == 'Do keyword Research':
-            do_web_research()
+            if check_search_apis():
+                do_web_research()
        elif mode == 'Create Blog Images':
            image_generator()
        elif mode == 'Competitor Analysis':
--- a/lib/ai_web_researcher/ai_news_researcher.py
+++ b/lib/ai_web_researcher/ai_news_researcher.py
@@ -1,172 +0,0 @@
-################################################################
-#
-# 
-# 
-##############################################################
-
-import os
-import json
-from pathlib import Path
-import sys
-from typing import List, NamedTuple
-from loguru import logger
-from datetime import datetime
-
-from ..gpt_providers.gemini_pro_text import gemini_text_response
-from .tavily_ai_search import get_tavilyai_results
-from .metaphor_basic_neural_web_search import metaphor_news_summarizer
-from .google_serp_search import google_news
-from .google_trends_researcher import do_google_trends_analysis
-from .gpt_blog_sections import get_blog_sections_from_websearch
-from .web_research_report import write_web_research_report
-
-
-# Configure logger
-logger.remove()
-logger.add(sys.stdout,
-           colorize=True,
-           format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
-           )
-
-
-def web_news_researcher(search_keywords, time_range=None, include_domains=list(), similar_url=None):
-    """ """
-    print(f"Web Research:Time Range - {time_range},Search Keywords - {search_keywords},Include URLs - {include_domains}")
-    if not include_domains:
-        include_domains = list()
-    # TBD: Keeping the results directory as fixed, for now.
-    os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "workspace", "web_research_reports", 
-        search_keywords.replace(" ", "_") + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
-    
-    # Collect all blog titles featuring in search results. This *may help in generating blog titles
-    # closest to competing ones. All search blog titles, given keyword and keywords from analysis, give
-    # llm a good context for the task of generating blog titles.
-    blog_titles = []
-    # Get a list of FAQs from search results.
-    blog_faqs = None
-    google_result = None
-    tavily_result = None
-    report = None
-    try:
-        logger.info(f"Doing Google search for: {search_keywords}\n")
-        google_result = google_search(search_keywords)
-        blog_titles.append(extract_info(google_result, "titles"))
-    except Exception as err:
-        logger.error(f"Failed to do Google Serpapi research: {err}")
-        # Not failing, as tavily would do same and then GPT-V to search.
-
-    try:
-        # FIXME: Include the follow-up questions as blog FAQs.
-        logger.info(f"Doing Tavily AI search for: {search_keywords}")
-        tavily_result = get_tavilyai_results(search_keywords, include_domains)
-        blog_titles.append(tavily_extract_information(tavily_result, "titles"))
-    except Exception as err:
-        logger.error(f"Failed to do Tavily AI Search: {err}")
-
-    try:
-        logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}")
-        response_articles = metaphor_search_articles(
-                search_keywords, 
-                include_domains=include_domains, 
-                time_range=time_range,
-                similar_url=similar_url)
-        blog_titles.append(metaphor_extract_titles_or_text(response_articles, return_titles=True))
-    except Exception as err:
-        logger.error(f"Failed to do Metaphor search: {err}")
-    print(blog_titles)
-
-    try:
-        logger.info(f"Do Google Trends analysis for given keywords: {search_keywords}")
-        important_keywords = do_google_trends_analysis(search_keywords)
-    except Exception as err:
-        logger.error(f"Failed to do google trends analysis: {err}")
-    print(important_keywords)
-    # Now that we have search results from given keywords. Generate blog title and subtopics suggestions.
-    # 1. Return a list of related keywords along with search volumes.
-    # 2. New blog titles to write on(niche, top) and blog sections.
-    # 3. Competitors list, similar urls if given.
-    print(f"\n\nReview the analysis in this file at: {os.environ.get('SEARCH_SAVE_FILE')}\n")
-
-
-def metaphor_extract_titles_or_text(json_data, return_titles=True):
-    """
-    Extract either titles or text from the given JSON structure.
-
-    Args:
-        json_data (list): List of Result objects in JSON format.
-        return_titles (bool): If True, return titles. If False, return text.
-
-    Returns:
-        list: List of titles or text.
-    """
-    if return_titles:
-        return [(result.title) for result in json_data]
-    else:
-        return [result.text for result in json_data]
-
-
-def extract_info(json_data, info_type):
-    """
-    Extract information (titles, peopleAlsoAsk, or relatedSearches) from the given JSON.
-
-    Args:
-        json_data (dict): The JSON data.
-        info_type (str): The type of information to extract (titles, peopleAlsoAsk, relatedSearches).
-
-    Returns:
-        list or None: A list containing the requested information, or None if the type is invalid.
-    """
-    if info_type == "titles":
-        return [result.get("title") for result in json_data.get("organic", [])]
-    elif info_type == "peopleAlsoAsk":
-        return [item.get("question") for item in json_data.get("peopleAlsoAsk", [])]
-    elif info_type == "relatedSearches":
-        return [item.get("query") for item in json_data.get("relatedSearches", [])]
-    else:
-        print("Invalid info_type. Please use 'titles', 'peopleAlsoAsk', or 'relatedSearches'.")
-        return None
-
-
-def tavily_extract_information(json_data, keyword):
-    """
-    Extract information from the given JSON based on the specified keyword.
-
-    Args:
-        json_data (dict): The JSON data.
-        keyword (str): The keyword (title, content, answer, follow-query).
-
-    Returns:
-        list or str: The extracted information based on the keyword.
-    """
-    if keyword == 'title':
-        return [result['title'] for result in json_data['results']]
-    elif keyword == 'content':
-        return [result['content'] for result in json_data['results']]
-    elif keyword == 'answer':
-        return json_data['answer']
-    elif keyword == 'follow-query':
-        return json_data['follow_up_questions']
-    else:
-        return f"Invalid keyword: {keyword}"
-
-
-def compete_organic_results(query, report, organic_results):
-    """ Given a blog content and google search organinc results, create a new blog to compete against them."""
-    prompt = f""" As an SEO expert and copywriter, I will provide you with my blog content on topic '{query}', and
-        Top google search results. 
-        Your task is to rewrite the given blog to make it compete against top position results. 
-        Make sure, the new blog has high probability of ranking highest against given organic search result competitors.
-        Modify the given blog content following best SEO practises.
-        Make sure the blog is original, unique and highly readable.
-        Remember, Maintain and adopt the formatting, structure, style and tone of the provided blog content.
-        Include relevant emojis in your final blog for visual appeal. Use it sparingly.
-        Your response should be well-structured, objective, and critically acclaimed blog article based on provided texts. 
-
-        Remember, your goal is to create a detailed blog article that will compete against given organic result competitors.
-        Do not provide explanations, suggestions for your response, reply only with your final response.
-        Take your time in crafting your content, do not rush to give the response.
-        Blog Content: '{report}'\n
-        Organic Search result: '{organic_results}'
-        """
-    report = gemini_text_response(prompt)
-    return report
--- a/lib/ai_writers/ai_agents_crew_writer.py
+++ b/lib/ai_writers/ai_agents_crew_writer.py
@@ -0,0 +1,151 @@
+import os
+from crewai import Agent, Task, Crew
+from crewai_tools import SerperDevTool
+from langchain_google_genai import ChatGoogleGenerativeAI
+
+def setup_environment():
+    os.environ["OPENAI_MODEL_NAME"] = 'gpt-3.5-turbo'  # Adjust based on available model
+
+def create_agents(search_keywords):
+    search_tool = SerperDevTool()
+
+    # Load the google gemini api key
+    google_api_key = os.getenv("GEMINI_API_KEY")
+
+    # Set gemini pro as llm
+    llm = ChatGoogleGenerativeAI(
+        model="gemini-pro", verbose=True, temperature=0.9, google_api_key=google_api_key
+    )
+
+    content_researcher = Agent(
+        role = 'Senior Research Analyst',
+        goal = f'Uncover content writing ideas for "{search_keywords}" keywords.',
+        backstory = f"""You work at a leading digital marketing firm.
+        Your expertise lies in identifying emerging trends, topic for content creation.
+        You are expert in researching latest information about various topics and {search_keywords}.
+        Your research and content suggestions are foundation for content writers.
+        Your detailed content research is pivotal to company's content strategy.""",
+        tools = [search_tool],
+        memory = True,  # Enable memory
+        verbose = True,
+        max_rpm = None,  # No limit on requests per minute
+        max_iter = 15,  # Default value for maximum iterations
+        allow_delegation = False,
+        llm = llm
+    )
+
+    content_outliner = Agent(
+        role = 'Senior Content Strategist',
+        goal = f'Create a content outline for "{search_keywords}" keywords, from your insights & provided context.',
+        backstory = """You are an expert digital content writer and marketing expert.
+            The content researcher had identified ideas to write content on. 
+            Use this knowledge to write your content outline.
+            Take your time going over the research. Your content outline will be expanded upon after review.""",
+        memory = True,  # Enable memory
+        verbose = True,
+        max_rpm = 10,  # No limit on requests per minute
+        max_iter = 5,  # Default value for maximum iterations
+        allow_delegation = False,
+        llm = llm
+    )
+
+    content_writer = Agent(
+        role = 'Content Strategist',
+        goal = f"""Craft compelling & SEO optimized content on {search_keywords}. 
+        Rank high on Google for popular long-tail keywords related to the short-tail keyword {search_keywords}""",
+        backstory = f"""You are a renowned Content Strategist, known for your insightful and engaging articles.
+        You transform complex concepts into compelling narratives. 
+        Limit them to 20 words or so, using language familiar to the majority. 
+        Example: Instead of "Utilize this methodology," say "Use this method."
+        Employ a clear and concise writing style.
+        Engage your audience with a compelling, fun, and informative tone,
+        that effectively conveys the technical aspects of the topic in simple terms.
+        """,
+        memory = True,  # Enable memory
+        verbose = True,
+        max_rpm = 10,  # No limit on requests per minute
+        max_iter = 5,  # Default value for maximum iterations
+        allow_delegation = False,
+        llm = llm
+    )
+
+    content_reviewer = Agent(
+	    role="Expert Writing Critic & content Editor.",
+        goal="Review the draft content and identfy potential issues.",
+        backstory="""You are expert reviewer with 10 years of exprience in reviewing digital content.
+        The make sure that article are interesting and correct information provided.
+        Simplicity will resonate with your readers.
+        Pay attention to grammar and punctuation.
+        Avoid AI sounding words and pass AI detection tools.
+        Engage with active voice. It’s as if you’re in conversation with the reader.
+        Example: Use "You will see benefits" instead of "One will see benefits."
+        Use headings, bullets, and formatting to break the monotony of the text. These elements add rhythm and can make a document more inviting.
+        A concise conclusion that resonates with the beginning can bring your piece full circle, satisfying your readers.
+        """,
+        memory=True,  # Enable memory
+        verbose=True,
+        max_rpm=10,  # No limit on requests per minute
+        max_iter=5,  # Default value for maximum iterations
+        allow_delegation=False,
+        llm=llm
+    )
+
+    return [content_researcher, content_outliner, content_writer, content_reviewer]
+
+def create_tasks(agents, search_keywords):
+    research_task = Task(
+            description=f"""Conduct a comprehensive topic analysis on the following: "{search_keywords}".
+        Identify keyword trends, SEO opportunities, and potential content ideas to write upon.
+        """,
+        expected_output="Provide Full analysis report in bullet points",
+        agent=agents[0]  # Assign to the researcher agent
+    )
+
+    outline_task = Task(
+        description="""Use the insights to produce a detailed content outline to expand upon later.""",
+        expected_output="A detailed and insightful content outline on {search_keywords}.",
+        #human_input=True,
+        agent=agents[1]  # Assign to the outliner agent
+    )
+
+    writer_task = Task(
+        description="""Using the insights provided, develop an engaging content that highlights {search_keywords}.
+        Your post should be informative yet accessible, catering to a tech-savvy audience.
+        Avoid complex words so it doesn't sound like AI.""",
+        expected_output="A 2000 words content convering most sections of the provided outline.",
+        agent=agents[2]  # Assign to the writer agent
+    )
+
+    proofread_task = Task(
+        description=f"""Sharpen the focus of the draft content by identifying overly wordy sections and crafting concise alternatives.
+        Words with many syllables are barriers to simplicity. 
+        Choose simpler words, avoid sounding like AI.
+        Pay special attention to readiblity, formatting & styling of the content.
+        Make sure the draft content SEO optimised for keywords: {search_keywords}.
+        Make sure the final content is 2000 words long.
+        """,
+        expected_output="Final content with your review comments edited in the content draft.",
+        agent=agents[3]  # Assign to the reviewer agent
+    )
+
+    return [research_task, outline_task, writer_task, proofread_task]
+
+def execute_tasks(agents, tasks, lang):
+    crew = Crew(
+        agents=agents,
+        tasks=tasks,
+        verbose=2,  # You can set it to 1 or 2 for different logging levels
+        #process=Process.sequential,
+        #memory=True,
+        language=lang
+    )
+    result = crew.kickoff()
+    return result
+
+def ai_agents_writers(search_keywords, lang="en"):
+    setup_environment()
+    agents = create_agents(search_keywords)
+    tasks = create_tasks(agents, search_keywords)
+    result = execute_tasks(agents, tasks, lang)
+    print("######################")
+    print(result)
--- a/lib/check_blog_seo/README.md
+++ b/lib/check_blog_seo/README.md
@@ -1,33 +0,0 @@
-## Implementation approach
-
-To implement the SEO module, we will use the following open-source tools and frameworks:
-
-1. Natural Language Toolkit (NLTK): NLTK is a popular library for natural language processing in Python. We can leverage NLTK to perform various SEO checks on the given text, such as keyword density, readability analysis, and sentiment analysis.
-
-2. Beautiful Soup: Beautiful Soup is a Python library for web scraping. We can use Beautiful Soup to extract relevant information from the given text, such as meta tags, headings, and image alt attributes.
-
-3. PyEnchant: PyEnchant is a spell checking library for Python. We can utilize PyEnchant to check the spelling and grammar of the given text and provide suggestions for improvement.
-
-4. TextBlob: TextBlob is a library for processing textual data. We can use TextBlob to perform part-of-speech tagging, noun phrase extraction, and other linguistic analyses on the given text.
-
-5. Flask: Use Flask for local testing and development purposes. Flask provides a lightweight web framework that allows us to quickly build and test our SEO module.
-
-Overall, by leveraging these open-source tools and frameworks, we can develop a comprehensive and efficient SEO module that meets the requirements and provides valuable insights and suggestions for improving the SEO of the given text.
-
-## Required Python third-party packages
-
- nltk==3.6.2
- beautifulsoup4==4.9.3
- pyenchant==3.2.1
- textblob==0.15.3
- flask==1.1.2
-
-## Modules
-
-The 'text_processor.py' file contains the TextProcessor class, which is responsible for extracting meta tags, headings, and image alt attributes from the given text.
-        
-The 'spell_checker.py' file contains the SpellChecker class, which is responsible for checking the spelling and grammar of the given text.
-        
-The 'seo_checker.py' file contains the SEOChecker class, which is responsible for coordinating the SEO checks by utilizing the TextProcessor and SpellChecker classes.
-
-
--- a/lib/check_blog_seo/TBD
+++ b/lib/check_blog_seo/TBD
@@ -0,0 +1 @@
+https://pypi.org/project/textstat/
--- a/lib/utils/alwrity_utils.py
+++ b/lib/utils/alwrity_utils.py
@@ -17,6 +17,7 @@ from lib.ai_writers.keywords_to_blog import write_blog_from_keywords
 from lib.ai_writers.speech_to_blog.main_audio_to_blog import generate_audio_blog
 from lib.ai_writers.long_form_ai_writer import long_form_generator
 from lib.ai_writers.ai_news_article_writer import ai_news_generation
+from lib.ai_writers.ai_agents_crew_writer import ai_agents_writers
 from lib.gpt_providers.text_generation.ai_story_writer import ai_story_generator
 from lib.gpt_providers.text_generation.ai_essay_writer import ai_essay_generator
 from lib.gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
@@ -49,15 +50,15 @@ def blog_from_keyword():
    """ Input blog keywords, research and write a factual blog."""
    while True:
            print("________________________________________________________________")
-            blog_keywords = input_dialog(
+            content_keywords = input_dialog(
                    title='Enter Keywords/Blog Title',
                    text='Shit in, Shit Out; Better keywords, better research, hence better content.\n👋 Enter keywords/Blog Title for blog generation:',
                ).run()

            # If the user cancels, exit the loop
-            if blog_keywords is None:
+            if content_keywords is None:
                break
-            if blog_keywords and len(blog_keywords.split()) >= 2:
+            if content_keywords and len(content_keywords.split()) >= 2:
                break
            else:
                message_dialog(
@@ -68,22 +69,29 @@ def blog_from_keyword():
        title="Select content type:",
        values=[
            ("normal", "Normal-length content"),
-            ("long", "Long-form content")
+            ("long", "Long-form content"),
+            ("Experimental", "Experimental - AI Agents team")
        ],
        default="normal"
    ).run()

    if choice == "normal":
        try:
-            write_blog_from_keywords(blog_keywords)
+            write_blog_from_keywords(content_keywords)
        except Exception as err:
-            print(f"Failed to write blog on {blog_keywords}, Error: {err}\n")
+            print(f"🚫 Failed to write blog on {blog_keywords}, Error: {err}\n")
            exit(1)
    elif choice == "long":
        try:
-            long_form_generator(blog_keywords)
+            long_form_generator(content_keywords)
        except Exception as err:
-            print(f"Failed to write blog on {blog_keywords}, Error: {err}\n")
+            print(f"🚫 Failed to write blog on {blog_keywords}, Error: {err}\n")
+            exit(1)
+    elif choice == "Experimental":
+        try:
+            ai_agents_writers(content_keywords)
+        except Exception as err:
+            print(f"🚫 Failed to Write content with AI agents: {err}\n")
            exit(1)


@@ -139,20 +147,19 @@ def ai_news_writer():

 def do_web_research():
    """ Input keywords and do web research and present a report."""
-    if check_search_apis():
-        while True:
-            print("________________________________________________________________")
-            search_keywords = input_dialog(
-                    title='Enter Search Keywords below: More Options in main_config.',
-                    text='👋 Enter keywords for web research (Or keywords from your blog):',
-                ).run()
-            if search_keywords and len(search_keywords.split()) >= 2:
-                break
-            else:
-                message_dialog(
-                    title='Warning',
-                    text='🚫 Search keywords should be at least three words long. Please try again.'
-                ).run()
+    while True:
+        print("________________________________________________________________")
+        search_keywords = input_dialog(
+                title='Enter Search Keywords below: More Options in main_config.',
+                text='👋 Enter keywords for web research (Or keywords from your blog):',
+            ).run()
+        if search_keywords and len(search_keywords.split()) >= 2:
+            break
+        else:
+            message_dialog(
+                title='Warning',
+                text='🚫 Search keywords should be at least three words long. Please try again.'
+            ).run()

    try:
        print(f"🚀🎬🚀 [bold green]Starting web research on given keywords: {search_keywords}..")
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,7 @@ rich
 python-dotenv
 loguru
 openai
+crewai[tool]
 google.generativeai
 mistralai
 tenacity
@@ -12,6 +13,7 @@ tabulate
 metaphor_python
 exa_py
 GoogleNews
+langchain-google-genai
 clint
 scikit-learn
 matplotlib