From 20c201f4f9d6377520bc9e07b6a39dc3e0af53fb Mon Sep 17 00:00:00 2001 From: ajaysi Date: Fri, 26 Apr 2024 16:07:17 +0530 Subject: [PATCH] Agentic content creation, web researched --- README.md | 16 +- alwrity.py | 3 +- lib/ai_web_researcher/ai_news_researcher.py | 172 -------------------- lib/ai_writers/ai_agents_crew_writer.py | 151 +++++++++++++++++ lib/check_blog_seo/README.md | 33 ---- lib/check_blog_seo/TBD | 1 + lib/utils/alwrity_utils.py | 51 +++--- requirements.txt | 2 + 8 files changed, 198 insertions(+), 231 deletions(-) delete mode 100644 lib/ai_web_researcher/ai_news_researcher.py create mode 100644 lib/ai_writers/ai_agents_crew_writer.py delete mode 100644 lib/check_blog_seo/README.md create mode 100644 lib/check_blog_seo/TBD diff --git a/README.md b/README.md index 3821d80b..4a7ac811 100644 --- a/README.md +++ b/README.md @@ -90,12 +90,22 @@ Congratulations: Once you've cloned the repository, you can proceed with the nex --- -### Option 3: Web URL 🌐 *(For easy access)* +## Packages, Tools, and APIs Used -Coming Soon.... +### Standing on the shoulders of Giants - Credits: +- **APIs**: + - [Exa API](https://exa.ai/): Provides semantic search capabilities for finding similar topics and technologies. + - [Tavily API](https://tavily.com/): Offers AI-powered web search functionality for conducting in-depth keyword research. + - [SerperDev API](https://serper.dev/): Enables access to search engine results and competitor analysis data. + - [YOU.com](https://you.com/): You.com enhances web search, writing, coding, digital art creation, and solving complex problems. + - [Stability AI](https://stability.ai/): Activating humanity's potential through generative AI. + Open models in every modality, for everyone, everywhere. + - [OpenAI API](https://openai.com/): Powers the Large Language Models (LLMs) for generating blog content and conducting research. + - [Gemini API](https://gemini.google.com/app): Google powered LLM for natural language processing tasks. + - [Ollama](https://ollama.com/) : Local, Privacy focused, LLM provider for research and content generation capabilities. + - [CrewAI](https://www.crewai.com/): Collaborative AI agents framework. --- - ## Features - **Online Research Integration**: Enhances blog content by integrating insights and information gathered from online research, ensuring the content is informative and up-to-date. This gives context for generating content. Tavily AI, Google search, serp and Vision AI is used to scrape web data for context augumentation. TBD: Include CrewAI for web research agents. diff --git a/alwrity.py b/alwrity.py index 12867455..8813e7d5 100644 --- a/alwrity.py +++ b/alwrity.py @@ -83,7 +83,8 @@ def start_interactive_mode(): elif mode == 'AI Image to Text Writer': image_to_text_writer() elif mode == 'Do keyword Research': - do_web_research() + if check_search_apis(): + do_web_research() elif mode == 'Create Blog Images': image_generator() elif mode == 'Competitor Analysis': diff --git a/lib/ai_web_researcher/ai_news_researcher.py b/lib/ai_web_researcher/ai_news_researcher.py deleted file mode 100644 index f1dedc33..00000000 --- a/lib/ai_web_researcher/ai_news_researcher.py +++ /dev/null @@ -1,172 +0,0 @@ -################################################################ -# -# -# -############################################################## - -import os -import json -from pathlib import Path -import sys -from typing import List, NamedTuple -from loguru import logger -from datetime import datetime - -from ..gpt_providers.gemini_pro_text import gemini_text_response -from .tavily_ai_search import get_tavilyai_results -from .metaphor_basic_neural_web_search import metaphor_news_summarizer -from .google_serp_search import google_news -from .google_trends_researcher import do_google_trends_analysis -from .gpt_blog_sections import get_blog_sections_from_websearch -from .web_research_report import write_web_research_report - - -# Configure logger -logger.remove() -logger.add(sys.stdout, - colorize=True, - format="{level}|{file}:{line}:{function}| {message}" - ) - - -def web_news_researcher(search_keywords, time_range=None, include_domains=list(), similar_url=None): - """ """ - print(f"Web Research:Time Range - {time_range},Search Keywords - {search_keywords},Include URLs - {include_domains}") - if not include_domains: - include_domains = list() - # TBD: Keeping the results directory as fixed, for now. - os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "workspace", "web_research_reports", - search_keywords.replace(" ", "_") + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) - - # Collect all blog titles featuring in search results. This *may help in generating blog titles - # closest to competing ones. All search blog titles, given keyword and keywords from analysis, give - # llm a good context for the task of generating blog titles. - blog_titles = [] - # Get a list of FAQs from search results. - blog_faqs = None - google_result = None - tavily_result = None - report = None - try: - logger.info(f"Doing Google search for: {search_keywords}\n") - google_result = google_search(search_keywords) - blog_titles.append(extract_info(google_result, "titles")) - except Exception as err: - logger.error(f"Failed to do Google Serpapi research: {err}") - # Not failing, as tavily would do same and then GPT-V to search. - - try: - # FIXME: Include the follow-up questions as blog FAQs. - logger.info(f"Doing Tavily AI search for: {search_keywords}") - tavily_result = get_tavilyai_results(search_keywords, include_domains) - blog_titles.append(tavily_extract_information(tavily_result, "titles")) - except Exception as err: - logger.error(f"Failed to do Tavily AI Search: {err}") - - try: - logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}") - response_articles = metaphor_search_articles( - search_keywords, - include_domains=include_domains, - time_range=time_range, - similar_url=similar_url) - blog_titles.append(metaphor_extract_titles_or_text(response_articles, return_titles=True)) - except Exception as err: - logger.error(f"Failed to do Metaphor search: {err}") - print(blog_titles) - - try: - logger.info(f"Do Google Trends analysis for given keywords: {search_keywords}") - important_keywords = do_google_trends_analysis(search_keywords) - except Exception as err: - logger.error(f"Failed to do google trends analysis: {err}") - print(important_keywords) - # Now that we have search results from given keywords. Generate blog title and subtopics suggestions. - # 1. Return a list of related keywords along with search volumes. - # 2. New blog titles to write on(niche, top) and blog sections. - # 3. Competitors list, similar urls if given. - print(f"\n\nReview the analysis in this file at: {os.environ.get('SEARCH_SAVE_FILE')}\n") - - -def metaphor_extract_titles_or_text(json_data, return_titles=True): - """ - Extract either titles or text from the given JSON structure. - - Args: - json_data (list): List of Result objects in JSON format. - return_titles (bool): If True, return titles. If False, return text. - - Returns: - list: List of titles or text. - """ - if return_titles: - return [(result.title) for result in json_data] - else: - return [result.text for result in json_data] - - -def extract_info(json_data, info_type): - """ - Extract information (titles, peopleAlsoAsk, or relatedSearches) from the given JSON. - - Args: - json_data (dict): The JSON data. - info_type (str): The type of information to extract (titles, peopleAlsoAsk, relatedSearches). - - Returns: - list or None: A list containing the requested information, or None if the type is invalid. - """ - if info_type == "titles": - return [result.get("title") for result in json_data.get("organic", [])] - elif info_type == "peopleAlsoAsk": - return [item.get("question") for item in json_data.get("peopleAlsoAsk", [])] - elif info_type == "relatedSearches": - return [item.get("query") for item in json_data.get("relatedSearches", [])] - else: - print("Invalid info_type. Please use 'titles', 'peopleAlsoAsk', or 'relatedSearches'.") - return None - - -def tavily_extract_information(json_data, keyword): - """ - Extract information from the given JSON based on the specified keyword. - - Args: - json_data (dict): The JSON data. - keyword (str): The keyword (title, content, answer, follow-query). - - Returns: - list or str: The extracted information based on the keyword. - """ - if keyword == 'title': - return [result['title'] for result in json_data['results']] - elif keyword == 'content': - return [result['content'] for result in json_data['results']] - elif keyword == 'answer': - return json_data['answer'] - elif keyword == 'follow-query': - return json_data['follow_up_questions'] - else: - return f"Invalid keyword: {keyword}" - - -def compete_organic_results(query, report, organic_results): - """ Given a blog content and google search organinc results, create a new blog to compete against them.""" - prompt = f""" As an SEO expert and copywriter, I will provide you with my blog content on topic '{query}', and - Top google search results. - Your task is to rewrite the given blog to make it compete against top position results. - Make sure, the new blog has high probability of ranking highest against given organic search result competitors. - Modify the given blog content following best SEO practises. - Make sure the blog is original, unique and highly readable. - Remember, Maintain and adopt the formatting, structure, style and tone of the provided blog content. - Include relevant emojis in your final blog for visual appeal. Use it sparingly. - Your response should be well-structured, objective, and critically acclaimed blog article based on provided texts. - - Remember, your goal is to create a detailed blog article that will compete against given organic result competitors. - Do not provide explanations, suggestions for your response, reply only with your final response. - Take your time in crafting your content, do not rush to give the response. - Blog Content: '{report}'\n - Organic Search result: '{organic_results}' - """ - report = gemini_text_response(prompt) - return report diff --git a/lib/ai_writers/ai_agents_crew_writer.py b/lib/ai_writers/ai_agents_crew_writer.py new file mode 100644 index 00000000..bfdf7a5b --- /dev/null +++ b/lib/ai_writers/ai_agents_crew_writer.py @@ -0,0 +1,151 @@ +import os +from crewai import Agent, Task, Crew +from crewai_tools import SerperDevTool +from langchain_google_genai import ChatGoogleGenerativeAI + +def setup_environment(): + os.environ["OPENAI_MODEL_NAME"] = 'gpt-3.5-turbo' # Adjust based on available model + +def create_agents(search_keywords): + search_tool = SerperDevTool() + + # Load the google gemini api key + google_api_key = os.getenv("GEMINI_API_KEY") + + # Set gemini pro as llm + llm = ChatGoogleGenerativeAI( + model="gemini-pro", verbose=True, temperature=0.9, google_api_key=google_api_key + ) + + content_researcher = Agent( + role = 'Senior Research Analyst', + goal = f'Uncover content writing ideas for "{search_keywords}" keywords.', + backstory = f"""You work at a leading digital marketing firm. + Your expertise lies in identifying emerging trends, topic for content creation. + You are expert in researching latest information about various topics and {search_keywords}. + Your research and content suggestions are foundation for content writers. + Your detailed content research is pivotal to company's content strategy.""", + tools = [search_tool], + memory = True, # Enable memory + verbose = True, + max_rpm = None, # No limit on requests per minute + max_iter = 15, # Default value for maximum iterations + allow_delegation = False, + llm = llm + ) + + content_outliner = Agent( + role = 'Senior Content Strategist', + goal = f'Create a content outline for "{search_keywords}" keywords, from your insights & provided context.', + backstory = """You are an expert digital content writer and marketing expert. + The content researcher had identified ideas to write content on. + Use this knowledge to write your content outline. + Take your time going over the research. Your content outline will be expanded upon after review.""", + memory = True, # Enable memory + verbose = True, + max_rpm = 10, # No limit on requests per minute + max_iter = 5, # Default value for maximum iterations + allow_delegation = False, + llm = llm + ) + + content_writer = Agent( + role = 'Content Strategist', + goal = f"""Craft compelling & SEO optimized content on {search_keywords}. + Rank high on Google for popular long-tail keywords related to the short-tail keyword {search_keywords}""", + backstory = f"""You are a renowned Content Strategist, known for your insightful and engaging articles. + You transform complex concepts into compelling narratives. + Limit them to 20 words or so, using language familiar to the majority. + Example: Instead of "Utilize this methodology," say "Use this method." + Employ a clear and concise writing style. + Engage your audience with a compelling, fun, and informative tone, + that effectively conveys the technical aspects of the topic in simple terms. + """, + memory = True, # Enable memory + verbose = True, + max_rpm = 10, # No limit on requests per minute + max_iter = 5, # Default value for maximum iterations + allow_delegation = False, + llm = llm + ) + + content_reviewer = Agent( + role="Expert Writing Critic & content Editor.", + goal="Review the draft content and identfy potential issues.", + backstory="""You are expert reviewer with 10 years of exprience in reviewing digital content. + The make sure that article are interesting and correct information provided. + Simplicity will resonate with your readers. + Pay attention to grammar and punctuation. + Avoid AI sounding words and pass AI detection tools. + Engage with active voice. It’s as if you’re in conversation with the reader. + Example: Use "You will see benefits" instead of "One will see benefits." + Use headings, bullets, and formatting to break the monotony of the text. These elements add rhythm and can make a document more inviting. + A concise conclusion that resonates with the beginning can bring your piece full circle, satisfying your readers. + """, + memory=True, # Enable memory + verbose=True, + max_rpm=10, # No limit on requests per minute + max_iter=5, # Default value for maximum iterations + allow_delegation=False, + llm=llm + ) + + return [content_researcher, content_outliner, content_writer, content_reviewer] + +def create_tasks(agents, search_keywords): + research_task = Task( + description=f"""Conduct a comprehensive topic analysis on the following: "{search_keywords}". + Identify keyword trends, SEO opportunities, and potential content ideas to write upon. + """, + expected_output="Provide Full analysis report in bullet points", + agent=agents[0] # Assign to the researcher agent + ) + + outline_task = Task( + description="""Use the insights to produce a detailed content outline to expand upon later.""", + expected_output="A detailed and insightful content outline on {search_keywords}.", + #human_input=True, + agent=agents[1] # Assign to the outliner agent + ) + + writer_task = Task( + description="""Using the insights provided, develop an engaging content that highlights {search_keywords}. + Your post should be informative yet accessible, catering to a tech-savvy audience. + Avoid complex words so it doesn't sound like AI.""", + expected_output="A 2000 words content convering most sections of the provided outline.", + agent=agents[2] # Assign to the writer agent + ) + + proofread_task = Task( + description=f"""Sharpen the focus of the draft content by identifying overly wordy sections and crafting concise alternatives. + Words with many syllables are barriers to simplicity. + Choose simpler words, avoid sounding like AI. + Pay special attention to readiblity, formatting & styling of the content. + Make sure the draft content SEO optimised for keywords: {search_keywords}. + Make sure the final content is 2000 words long. + """, + expected_output="Final content with your review comments edited in the content draft.", + agent=agents[3] # Assign to the reviewer agent + ) + + return [research_task, outline_task, writer_task, proofread_task] + +def execute_tasks(agents, tasks, lang): + crew = Crew( + agents=agents, + tasks=tasks, + verbose=2, # You can set it to 1 or 2 for different logging levels + #process=Process.sequential, + #memory=True, + language=lang + ) + result = crew.kickoff() + return result + +def ai_agents_writers(search_keywords, lang="en"): + setup_environment() + agents = create_agents(search_keywords) + tasks = create_tasks(agents, search_keywords) + result = execute_tasks(agents, tasks, lang) + print("######################") + print(result) diff --git a/lib/check_blog_seo/README.md b/lib/check_blog_seo/README.md deleted file mode 100644 index bd471a99..00000000 --- a/lib/check_blog_seo/README.md +++ /dev/null @@ -1,33 +0,0 @@ -## Implementation approach - -To implement the SEO module, we will use the following open-source tools and frameworks: - -1. Natural Language Toolkit (NLTK): NLTK is a popular library for natural language processing in Python. We can leverage NLTK to perform various SEO checks on the given text, such as keyword density, readability analysis, and sentiment analysis. - -2. Beautiful Soup: Beautiful Soup is a Python library for web scraping. We can use Beautiful Soup to extract relevant information from the given text, such as meta tags, headings, and image alt attributes. - -3. PyEnchant: PyEnchant is a spell checking library for Python. We can utilize PyEnchant to check the spelling and grammar of the given text and provide suggestions for improvement. - -4. TextBlob: TextBlob is a library for processing textual data. We can use TextBlob to perform part-of-speech tagging, noun phrase extraction, and other linguistic analyses on the given text. - -5. Flask: Use Flask for local testing and development purposes. Flask provides a lightweight web framework that allows us to quickly build and test our SEO module. - -Overall, by leveraging these open-source tools and frameworks, we can develop a comprehensive and efficient SEO module that meets the requirements and provides valuable insights and suggestions for improving the SEO of the given text. - -## Required Python third-party packages - -- nltk==3.6.2 -- beautifulsoup4==4.9.3 -- pyenchant==3.2.1 -- textblob==0.15.3 -- flask==1.1.2 - -## Modules - -The 'text_processor.py' file contains the TextProcessor class, which is responsible for extracting meta tags, headings, and image alt attributes from the given text. - -The 'spell_checker.py' file contains the SpellChecker class, which is responsible for checking the spelling and grammar of the given text. - -The 'seo_checker.py' file contains the SEOChecker class, which is responsible for coordinating the SEO checks by utilizing the TextProcessor and SpellChecker classes. - - diff --git a/lib/check_blog_seo/TBD b/lib/check_blog_seo/TBD new file mode 100644 index 00000000..a8515fbe --- /dev/null +++ b/lib/check_blog_seo/TBD @@ -0,0 +1 @@ +https://pypi.org/project/textstat/ diff --git a/lib/utils/alwrity_utils.py b/lib/utils/alwrity_utils.py index 0dc21c56..c52dcc75 100644 --- a/lib/utils/alwrity_utils.py +++ b/lib/utils/alwrity_utils.py @@ -17,6 +17,7 @@ from lib.ai_writers.keywords_to_blog import write_blog_from_keywords from lib.ai_writers.speech_to_blog.main_audio_to_blog import generate_audio_blog from lib.ai_writers.long_form_ai_writer import long_form_generator from lib.ai_writers.ai_news_article_writer import ai_news_generation +from lib.ai_writers.ai_agents_crew_writer import ai_agents_writers from lib.gpt_providers.text_generation.ai_story_writer import ai_story_generator from lib.gpt_providers.text_generation.ai_essay_writer import ai_essay_generator from lib.gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image @@ -49,15 +50,15 @@ def blog_from_keyword(): """ Input blog keywords, research and write a factual blog.""" while True: print("________________________________________________________________") - blog_keywords = input_dialog( + content_keywords = input_dialog( title='Enter Keywords/Blog Title', text='Shit in, Shit Out; Better keywords, better research, hence better content.\nπŸ‘‹ Enter keywords/Blog Title for blog generation:', ).run() # If the user cancels, exit the loop - if blog_keywords is None: + if content_keywords is None: break - if blog_keywords and len(blog_keywords.split()) >= 2: + if content_keywords and len(content_keywords.split()) >= 2: break else: message_dialog( @@ -68,22 +69,29 @@ def blog_from_keyword(): title="Select content type:", values=[ ("normal", "Normal-length content"), - ("long", "Long-form content") + ("long", "Long-form content"), + ("Experimental", "Experimental - AI Agents team") ], default="normal" ).run() if choice == "normal": try: - write_blog_from_keywords(blog_keywords) + write_blog_from_keywords(content_keywords) except Exception as err: - print(f"Failed to write blog on {blog_keywords}, Error: {err}\n") + print(f"🚫 Failed to write blog on {blog_keywords}, Error: {err}\n") exit(1) elif choice == "long": try: - long_form_generator(blog_keywords) + long_form_generator(content_keywords) except Exception as err: - print(f"Failed to write blog on {blog_keywords}, Error: {err}\n") + print(f"🚫 Failed to write blog on {blog_keywords}, Error: {err}\n") + exit(1) + elif choice == "Experimental": + try: + ai_agents_writers(content_keywords) + except Exception as err: + print(f"🚫 Failed to Write content with AI agents: {err}\n") exit(1) @@ -139,20 +147,19 @@ def ai_news_writer(): def do_web_research(): """ Input keywords and do web research and present a report.""" - if check_search_apis(): - while True: - print("________________________________________________________________") - search_keywords = input_dialog( - title='Enter Search Keywords below: More Options in main_config.', - text='πŸ‘‹ Enter keywords for web research (Or keywords from your blog):', - ).run() - if search_keywords and len(search_keywords.split()) >= 2: - break - else: - message_dialog( - title='Warning', - text='🚫 Search keywords should be at least three words long. Please try again.' - ).run() + while True: + print("________________________________________________________________") + search_keywords = input_dialog( + title='Enter Search Keywords below: More Options in main_config.', + text='πŸ‘‹ Enter keywords for web research (Or keywords from your blog):', + ).run() + if search_keywords and len(search_keywords.split()) >= 2: + break + else: + message_dialog( + title='Warning', + text='🚫 Search keywords should be at least three words long. Please try again.' + ).run() try: print(f"πŸš€πŸŽ¬πŸš€ [bold green]Starting web research on given keywords: {search_keywords}..") diff --git a/requirements.txt b/requirements.txt index d1980063..414a2dfb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ rich python-dotenv loguru openai +crewai[tool] google.generativeai mistralai tenacity @@ -12,6 +13,7 @@ tabulate metaphor_python exa_py GoogleNews +langchain-google-genai clint scikit-learn matplotlib