Agentic content creation, web researched

This commit is contained in:
ajaysi
2024-04-26 16:07:17 +05:30
parent 45d324a2a9
commit 20c201f4f9
8 changed files with 198 additions and 231 deletions

View File

@@ -90,12 +90,22 @@ Congratulations: Once you've cloned the repository, you can proceed with the nex
---
### Option 3: Web URL 🌐 *(For easy access)*
## Packages, Tools, and APIs Used
Coming Soon....
### Standing on the shoulders of Giants - Credits:
- **APIs**:
- [Exa API](https://exa.ai/): Provides semantic search capabilities for finding similar topics and technologies.
- [Tavily API](https://tavily.com/): Offers AI-powered web search functionality for conducting in-depth keyword research.
- [SerperDev API](https://serper.dev/): Enables access to search engine results and competitor analysis data.
- [YOU.com](https://you.com/): You.com enhances web search, writing, coding, digital art creation, and solving complex problems.
- [Stability AI](https://stability.ai/): Activating humanity's potential through generative AI.
Open models in every modality, for everyone, everywhere.
- [OpenAI API](https://openai.com/): Powers the Large Language Models (LLMs) for generating blog content and conducting research.
- [Gemini API](https://gemini.google.com/app): Google powered LLM for natural language processing tasks.
- [Ollama](https://ollama.com/) : Local, Privacy focused, LLM provider for research and content generation capabilities.
- [CrewAI](https://www.crewai.com/): Collaborative AI agents framework.
---
## Features
- **Online Research Integration**: Enhances blog content by integrating insights and information gathered from online research, ensuring the content is informative and up-to-date. This gives context for generating content. Tavily AI, Google search, serp and Vision AI is used to scrape web data for context augumentation. TBD: Include CrewAI for web research agents.

View File

@@ -83,7 +83,8 @@ def start_interactive_mode():
elif mode == 'AI Image to Text Writer':
image_to_text_writer()
elif mode == 'Do keyword Research':
do_web_research()
if check_search_apis():
do_web_research()
elif mode == 'Create Blog Images':
image_generator()
elif mode == 'Competitor Analysis':

View File

@@ -1,172 +0,0 @@
################################################################
#
#
#
##############################################################
import os
import json
from pathlib import Path
import sys
from typing import List, NamedTuple
from loguru import logger
from datetime import datetime
from ..gpt_providers.gemini_pro_text import gemini_text_response
from .tavily_ai_search import get_tavilyai_results
from .metaphor_basic_neural_web_search import metaphor_news_summarizer
from .google_serp_search import google_news
from .google_trends_researcher import do_google_trends_analysis
from .gpt_blog_sections import get_blog_sections_from_websearch
from .web_research_report import write_web_research_report
# Configure logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
def web_news_researcher(search_keywords, time_range=None, include_domains=list(), similar_url=None):
""" """
print(f"Web Research:Time Range - {time_range},Search Keywords - {search_keywords},Include URLs - {include_domains}")
if not include_domains:
include_domains = list()
# TBD: Keeping the results directory as fixed, for now.
os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "workspace", "web_research_reports",
search_keywords.replace(" ", "_") + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
# Collect all blog titles featuring in search results. This *may help in generating blog titles
# closest to competing ones. All search blog titles, given keyword and keywords from analysis, give
# llm a good context for the task of generating blog titles.
blog_titles = []
# Get a list of FAQs from search results.
blog_faqs = None
google_result = None
tavily_result = None
report = None
try:
logger.info(f"Doing Google search for: {search_keywords}\n")
google_result = google_search(search_keywords)
blog_titles.append(extract_info(google_result, "titles"))
except Exception as err:
logger.error(f"Failed to do Google Serpapi research: {err}")
# Not failing, as tavily would do same and then GPT-V to search.
try:
# FIXME: Include the follow-up questions as blog FAQs.
logger.info(f"Doing Tavily AI search for: {search_keywords}")
tavily_result = get_tavilyai_results(search_keywords, include_domains)
blog_titles.append(tavily_extract_information(tavily_result, "titles"))
except Exception as err:
logger.error(f"Failed to do Tavily AI Search: {err}")
try:
logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}")
response_articles = metaphor_search_articles(
search_keywords,
include_domains=include_domains,
time_range=time_range,
similar_url=similar_url)
blog_titles.append(metaphor_extract_titles_or_text(response_articles, return_titles=True))
except Exception as err:
logger.error(f"Failed to do Metaphor search: {err}")
print(blog_titles)
try:
logger.info(f"Do Google Trends analysis for given keywords: {search_keywords}")
important_keywords = do_google_trends_analysis(search_keywords)
except Exception as err:
logger.error(f"Failed to do google trends analysis: {err}")
print(important_keywords)
# Now that we have search results from given keywords. Generate blog title and subtopics suggestions.
# 1. Return a list of related keywords along with search volumes.
# 2. New blog titles to write on(niche, top) and blog sections.
# 3. Competitors list, similar urls if given.
print(f"\n\nReview the analysis in this file at: {os.environ.get('SEARCH_SAVE_FILE')}\n")
def metaphor_extract_titles_or_text(json_data, return_titles=True):
"""
Extract either titles or text from the given JSON structure.
Args:
json_data (list): List of Result objects in JSON format.
return_titles (bool): If True, return titles. If False, return text.
Returns:
list: List of titles or text.
"""
if return_titles:
return [(result.title) for result in json_data]
else:
return [result.text for result in json_data]
def extract_info(json_data, info_type):
"""
Extract information (titles, peopleAlsoAsk, or relatedSearches) from the given JSON.
Args:
json_data (dict): The JSON data.
info_type (str): The type of information to extract (titles, peopleAlsoAsk, relatedSearches).
Returns:
list or None: A list containing the requested information, or None if the type is invalid.
"""
if info_type == "titles":
return [result.get("title") for result in json_data.get("organic", [])]
elif info_type == "peopleAlsoAsk":
return [item.get("question") for item in json_data.get("peopleAlsoAsk", [])]
elif info_type == "relatedSearches":
return [item.get("query") for item in json_data.get("relatedSearches", [])]
else:
print("Invalid info_type. Please use 'titles', 'peopleAlsoAsk', or 'relatedSearches'.")
return None
def tavily_extract_information(json_data, keyword):
"""
Extract information from the given JSON based on the specified keyword.
Args:
json_data (dict): The JSON data.
keyword (str): The keyword (title, content, answer, follow-query).
Returns:
list or str: The extracted information based on the keyword.
"""
if keyword == 'title':
return [result['title'] for result in json_data['results']]
elif keyword == 'content':
return [result['content'] for result in json_data['results']]
elif keyword == 'answer':
return json_data['answer']
elif keyword == 'follow-query':
return json_data['follow_up_questions']
else:
return f"Invalid keyword: {keyword}"
def compete_organic_results(query, report, organic_results):
""" Given a blog content and google search organinc results, create a new blog to compete against them."""
prompt = f""" As an SEO expert and copywriter, I will provide you with my blog content on topic '{query}', and
Top google search results.
Your task is to rewrite the given blog to make it compete against top position results.
Make sure, the new blog has high probability of ranking highest against given organic search result competitors.
Modify the given blog content following best SEO practises.
Make sure the blog is original, unique and highly readable.
Remember, Maintain and adopt the formatting, structure, style and tone of the provided blog content.
Include relevant emojis in your final blog for visual appeal. Use it sparingly.
Your response should be well-structured, objective, and critically acclaimed blog article based on provided texts.
Remember, your goal is to create a detailed blog article that will compete against given organic result competitors.
Do not provide explanations, suggestions for your response, reply only with your final response.
Take your time in crafting your content, do not rush to give the response.
Blog Content: '{report}'\n
Organic Search result: '{organic_results}'
"""
report = gemini_text_response(prompt)
return report

View File

@@ -0,0 +1,151 @@
import os
from crewai import Agent, Task, Crew
from crewai_tools import SerperDevTool
from langchain_google_genai import ChatGoogleGenerativeAI
def setup_environment():
os.environ["OPENAI_MODEL_NAME"] = 'gpt-3.5-turbo' # Adjust based on available model
def create_agents(search_keywords):
search_tool = SerperDevTool()
# Load the google gemini api key
google_api_key = os.getenv("GEMINI_API_KEY")
# Set gemini pro as llm
llm = ChatGoogleGenerativeAI(
model="gemini-pro", verbose=True, temperature=0.9, google_api_key=google_api_key
)
content_researcher = Agent(
role = 'Senior Research Analyst',
goal = f'Uncover content writing ideas for "{search_keywords}" keywords.',
backstory = f"""You work at a leading digital marketing firm.
Your expertise lies in identifying emerging trends, topic for content creation.
You are expert in researching latest information about various topics and {search_keywords}.
Your research and content suggestions are foundation for content writers.
Your detailed content research is pivotal to company's content strategy.""",
tools = [search_tool],
memory = True, # Enable memory
verbose = True,
max_rpm = None, # No limit on requests per minute
max_iter = 15, # Default value for maximum iterations
allow_delegation = False,
llm = llm
)
content_outliner = Agent(
role = 'Senior Content Strategist',
goal = f'Create a content outline for "{search_keywords}" keywords, from your insights & provided context.',
backstory = """You are an expert digital content writer and marketing expert.
The content researcher had identified ideas to write content on.
Use this knowledge to write your content outline.
Take your time going over the research. Your content outline will be expanded upon after review.""",
memory = True, # Enable memory
verbose = True,
max_rpm = 10, # No limit on requests per minute
max_iter = 5, # Default value for maximum iterations
allow_delegation = False,
llm = llm
)
content_writer = Agent(
role = 'Content Strategist',
goal = f"""Craft compelling & SEO optimized content on {search_keywords}.
Rank high on Google for popular long-tail keywords related to the short-tail keyword {search_keywords}""",
backstory = f"""You are a renowned Content Strategist, known for your insightful and engaging articles.
You transform complex concepts into compelling narratives.
Limit them to 20 words or so, using language familiar to the majority.
Example: Instead of "Utilize this methodology," say "Use this method."
Employ a clear and concise writing style.
Engage your audience with a compelling, fun, and informative tone,
that effectively conveys the technical aspects of the topic in simple terms.
""",
memory = True, # Enable memory
verbose = True,
max_rpm = 10, # No limit on requests per minute
max_iter = 5, # Default value for maximum iterations
allow_delegation = False,
llm = llm
)
content_reviewer = Agent(
role="Expert Writing Critic & content Editor.",
goal="Review the draft content and identfy potential issues.",
backstory="""You are expert reviewer with 10 years of exprience in reviewing digital content.
The make sure that article are interesting and correct information provided.
Simplicity will resonate with your readers.
Pay attention to grammar and punctuation.
Avoid AI sounding words and pass AI detection tools.
Engage with active voice. Its as if youre in conversation with the reader.
Example: Use "You will see benefits" instead of "One will see benefits."
Use headings, bullets, and formatting to break the monotony of the text. These elements add rhythm and can make a document more inviting.
A concise conclusion that resonates with the beginning can bring your piece full circle, satisfying your readers.
""",
memory=True, # Enable memory
verbose=True,
max_rpm=10, # No limit on requests per minute
max_iter=5, # Default value for maximum iterations
allow_delegation=False,
llm=llm
)
return [content_researcher, content_outliner, content_writer, content_reviewer]
def create_tasks(agents, search_keywords):
research_task = Task(
description=f"""Conduct a comprehensive topic analysis on the following: "{search_keywords}".
Identify keyword trends, SEO opportunities, and potential content ideas to write upon.
""",
expected_output="Provide Full analysis report in bullet points",
agent=agents[0] # Assign to the researcher agent
)
outline_task = Task(
description="""Use the insights to produce a detailed content outline to expand upon later.""",
expected_output="A detailed and insightful content outline on {search_keywords}.",
#human_input=True,
agent=agents[1] # Assign to the outliner agent
)
writer_task = Task(
description="""Using the insights provided, develop an engaging content that highlights {search_keywords}.
Your post should be informative yet accessible, catering to a tech-savvy audience.
Avoid complex words so it doesn't sound like AI.""",
expected_output="A 2000 words content convering most sections of the provided outline.",
agent=agents[2] # Assign to the writer agent
)
proofread_task = Task(
description=f"""Sharpen the focus of the draft content by identifying overly wordy sections and crafting concise alternatives.
Words with many syllables are barriers to simplicity.
Choose simpler words, avoid sounding like AI.
Pay special attention to readiblity, formatting & styling of the content.
Make sure the draft content SEO optimised for keywords: {search_keywords}.
Make sure the final content is 2000 words long.
""",
expected_output="Final content with your review comments edited in the content draft.",
agent=agents[3] # Assign to the reviewer agent
)
return [research_task, outline_task, writer_task, proofread_task]
def execute_tasks(agents, tasks, lang):
crew = Crew(
agents=agents,
tasks=tasks,
verbose=2, # You can set it to 1 or 2 for different logging levels
#process=Process.sequential,
#memory=True,
language=lang
)
result = crew.kickoff()
return result
def ai_agents_writers(search_keywords, lang="en"):
setup_environment()
agents = create_agents(search_keywords)
tasks = create_tasks(agents, search_keywords)
result = execute_tasks(agents, tasks, lang)
print("######################")
print(result)

View File

@@ -1,33 +0,0 @@
## Implementation approach
To implement the SEO module, we will use the following open-source tools and frameworks:
1. Natural Language Toolkit (NLTK): NLTK is a popular library for natural language processing in Python. We can leverage NLTK to perform various SEO checks on the given text, such as keyword density, readability analysis, and sentiment analysis.
2. Beautiful Soup: Beautiful Soup is a Python library for web scraping. We can use Beautiful Soup to extract relevant information from the given text, such as meta tags, headings, and image alt attributes.
3. PyEnchant: PyEnchant is a spell checking library for Python. We can utilize PyEnchant to check the spelling and grammar of the given text and provide suggestions for improvement.
4. TextBlob: TextBlob is a library for processing textual data. We can use TextBlob to perform part-of-speech tagging, noun phrase extraction, and other linguistic analyses on the given text.
5. Flask: Use Flask for local testing and development purposes. Flask provides a lightweight web framework that allows us to quickly build and test our SEO module.
Overall, by leveraging these open-source tools and frameworks, we can develop a comprehensive and efficient SEO module that meets the requirements and provides valuable insights and suggestions for improving the SEO of the given text.
## Required Python third-party packages
- nltk==3.6.2
- beautifulsoup4==4.9.3
- pyenchant==3.2.1
- textblob==0.15.3
- flask==1.1.2
## Modules
The 'text_processor.py' file contains the TextProcessor class, which is responsible for extracting meta tags, headings, and image alt attributes from the given text.
The 'spell_checker.py' file contains the SpellChecker class, which is responsible for checking the spelling and grammar of the given text.
The 'seo_checker.py' file contains the SEOChecker class, which is responsible for coordinating the SEO checks by utilizing the TextProcessor and SpellChecker classes.

1
lib/check_blog_seo/TBD Normal file
View File

@@ -0,0 +1 @@
https://pypi.org/project/textstat/

View File

@@ -17,6 +17,7 @@ from lib.ai_writers.keywords_to_blog import write_blog_from_keywords
from lib.ai_writers.speech_to_blog.main_audio_to_blog import generate_audio_blog
from lib.ai_writers.long_form_ai_writer import long_form_generator
from lib.ai_writers.ai_news_article_writer import ai_news_generation
from lib.ai_writers.ai_agents_crew_writer import ai_agents_writers
from lib.gpt_providers.text_generation.ai_story_writer import ai_story_generator
from lib.gpt_providers.text_generation.ai_essay_writer import ai_essay_generator
from lib.gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
@@ -49,15 +50,15 @@ def blog_from_keyword():
""" Input blog keywords, research and write a factual blog."""
while True:
print("________________________________________________________________")
blog_keywords = input_dialog(
content_keywords = input_dialog(
title='Enter Keywords/Blog Title',
text='Shit in, Shit Out; Better keywords, better research, hence better content.\n👋 Enter keywords/Blog Title for blog generation:',
).run()
# If the user cancels, exit the loop
if blog_keywords is None:
if content_keywords is None:
break
if blog_keywords and len(blog_keywords.split()) >= 2:
if content_keywords and len(content_keywords.split()) >= 2:
break
else:
message_dialog(
@@ -68,22 +69,29 @@ def blog_from_keyword():
title="Select content type:",
values=[
("normal", "Normal-length content"),
("long", "Long-form content")
("long", "Long-form content"),
("Experimental", "Experimental - AI Agents team")
],
default="normal"
).run()
if choice == "normal":
try:
write_blog_from_keywords(blog_keywords)
write_blog_from_keywords(content_keywords)
except Exception as err:
print(f"Failed to write blog on {blog_keywords}, Error: {err}\n")
print(f"🚫 Failed to write blog on {blog_keywords}, Error: {err}\n")
exit(1)
elif choice == "long":
try:
long_form_generator(blog_keywords)
long_form_generator(content_keywords)
except Exception as err:
print(f"Failed to write blog on {blog_keywords}, Error: {err}\n")
print(f"🚫 Failed to write blog on {blog_keywords}, Error: {err}\n")
exit(1)
elif choice == "Experimental":
try:
ai_agents_writers(content_keywords)
except Exception as err:
print(f"🚫 Failed to Write content with AI agents: {err}\n")
exit(1)
@@ -139,20 +147,19 @@ def ai_news_writer():
def do_web_research():
""" Input keywords and do web research and present a report."""
if check_search_apis():
while True:
print("________________________________________________________________")
search_keywords = input_dialog(
title='Enter Search Keywords below: More Options in main_config.',
text='👋 Enter keywords for web research (Or keywords from your blog):',
).run()
if search_keywords and len(search_keywords.split()) >= 2:
break
else:
message_dialog(
title='Warning',
text='🚫 Search keywords should be at least three words long. Please try again.'
).run()
while True:
print("________________________________________________________________")
search_keywords = input_dialog(
title='Enter Search Keywords below: More Options in main_config.',
text='👋 Enter keywords for web research (Or keywords from your blog):',
).run()
if search_keywords and len(search_keywords.split()) >= 2:
break
else:
message_dialog(
title='Warning',
text='🚫 Search keywords should be at least three words long. Please try again.'
).run()
try:
print(f"🚀🎬🚀 [bold green]Starting web research on given keywords: {search_keywords}..")

View File

@@ -4,6 +4,7 @@ rich
python-dotenv
loguru
openai
crewai[tool]
google.generativeai
mistralai
tenacity
@@ -12,6 +13,7 @@ tabulate
metaphor_python
exa_py
GoogleNews
langchain-google-genai
clint
scikit-learn
matplotlib