WIP- Try AI-Writer and Web research; working.
This commit is contained in:
172
lib/ai_web_researcher/ai_news_researcher.py
Normal file
172
lib/ai_web_researcher/ai_news_researcher.py
Normal file
@@ -0,0 +1,172 @@
|
||||
################################################################
|
||||
#
|
||||
#
|
||||
#
|
||||
##############################################################
|
||||
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from typing import List, NamedTuple
|
||||
from loguru import logger
|
||||
from datetime import datetime
|
||||
|
||||
from ..gpt_providers.gemini_pro_text import gemini_text_response
|
||||
from .tavily_ai_search import get_tavilyai_results
|
||||
from .metaphor_basic_neural_web_search import metaphor_news_summarizer
|
||||
from .google_serp_search import google_news
|
||||
from .google_trends_researcher import do_google_trends_analysis
|
||||
from .gpt_blog_sections import get_blog_sections_from_websearch
|
||||
from .web_research_report import write_web_research_report
|
||||
|
||||
|
||||
# Configure logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def web_news_researcher(search_keywords, time_range=None, include_domains=list(), similar_url=None):
|
||||
""" """
|
||||
print(f"Web Research:Time Range - {time_range},Search Keywords - {search_keywords},Include URLs - {include_domains}")
|
||||
if not include_domains:
|
||||
include_domains = list()
|
||||
# TBD: Keeping the results directory as fixed, for now.
|
||||
os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "workspace", "web_research_reports",
|
||||
search_keywords.replace(" ", "_") + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
|
||||
|
||||
# Collect all blog titles featuring in search results. This *may help in generating blog titles
|
||||
# closest to competing ones. All search blog titles, given keyword and keywords from analysis, give
|
||||
# llm a good context for the task of generating blog titles.
|
||||
blog_titles = []
|
||||
# Get a list of FAQs from search results.
|
||||
blog_faqs = None
|
||||
google_result = None
|
||||
tavily_result = None
|
||||
report = None
|
||||
try:
|
||||
logger.info(f"Doing Google search for: {search_keywords}\n")
|
||||
google_result = google_search(search_keywords)
|
||||
blog_titles.append(extract_info(google_result, "titles"))
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Google Serpapi research: {err}")
|
||||
# Not failing, as tavily would do same and then GPT-V to search.
|
||||
|
||||
try:
|
||||
# FIXME: Include the follow-up questions as blog FAQs.
|
||||
logger.info(f"Doing Tavily AI search for: {search_keywords}")
|
||||
tavily_result = get_tavilyai_results(search_keywords, include_domains)
|
||||
blog_titles.append(tavily_extract_information(tavily_result, "titles"))
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Tavily AI Search: {err}")
|
||||
|
||||
try:
|
||||
logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}")
|
||||
response_articles = metaphor_search_articles(
|
||||
search_keywords,
|
||||
include_domains=include_domains,
|
||||
time_range=time_range,
|
||||
similar_url=similar_url)
|
||||
blog_titles.append(metaphor_extract_titles_or_text(response_articles, return_titles=True))
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Metaphor search: {err}")
|
||||
print(blog_titles)
|
||||
|
||||
try:
|
||||
logger.info(f"Do Google Trends analysis for given keywords: {search_keywords}")
|
||||
important_keywords = do_google_trends_analysis(search_keywords)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do google trends analysis: {err}")
|
||||
print(important_keywords)
|
||||
# Now that we have search results from given keywords. Generate blog title and subtopics suggestions.
|
||||
# 1. Return a list of related keywords along with search volumes.
|
||||
# 2. New blog titles to write on(niche, top) and blog sections.
|
||||
# 3. Competitors list, similar urls if given.
|
||||
print(f"\n\nReview the analysis in this file at: {os.environ.get('SEARCH_SAVE_FILE')}\n")
|
||||
|
||||
|
||||
def metaphor_extract_titles_or_text(json_data, return_titles=True):
|
||||
"""
|
||||
Extract either titles or text from the given JSON structure.
|
||||
|
||||
Args:
|
||||
json_data (list): List of Result objects in JSON format.
|
||||
return_titles (bool): If True, return titles. If False, return text.
|
||||
|
||||
Returns:
|
||||
list: List of titles or text.
|
||||
"""
|
||||
if return_titles:
|
||||
return [(result.title) for result in json_data]
|
||||
else:
|
||||
return [result.text for result in json_data]
|
||||
|
||||
|
||||
def extract_info(json_data, info_type):
|
||||
"""
|
||||
Extract information (titles, peopleAlsoAsk, or relatedSearches) from the given JSON.
|
||||
|
||||
Args:
|
||||
json_data (dict): The JSON data.
|
||||
info_type (str): The type of information to extract (titles, peopleAlsoAsk, relatedSearches).
|
||||
|
||||
Returns:
|
||||
list or None: A list containing the requested information, or None if the type is invalid.
|
||||
"""
|
||||
if info_type == "titles":
|
||||
return [result.get("title") for result in json_data.get("organic", [])]
|
||||
elif info_type == "peopleAlsoAsk":
|
||||
return [item.get("question") for item in json_data.get("peopleAlsoAsk", [])]
|
||||
elif info_type == "relatedSearches":
|
||||
return [item.get("query") for item in json_data.get("relatedSearches", [])]
|
||||
else:
|
||||
print("Invalid info_type. Please use 'titles', 'peopleAlsoAsk', or 'relatedSearches'.")
|
||||
return None
|
||||
|
||||
|
||||
def tavily_extract_information(json_data, keyword):
|
||||
"""
|
||||
Extract information from the given JSON based on the specified keyword.
|
||||
|
||||
Args:
|
||||
json_data (dict): The JSON data.
|
||||
keyword (str): The keyword (title, content, answer, follow-query).
|
||||
|
||||
Returns:
|
||||
list or str: The extracted information based on the keyword.
|
||||
"""
|
||||
if keyword == 'title':
|
||||
return [result['title'] for result in json_data['results']]
|
||||
elif keyword == 'content':
|
||||
return [result['content'] for result in json_data['results']]
|
||||
elif keyword == 'answer':
|
||||
return json_data['answer']
|
||||
elif keyword == 'follow-query':
|
||||
return json_data['follow_up_questions']
|
||||
else:
|
||||
return f"Invalid keyword: {keyword}"
|
||||
|
||||
|
||||
def compete_organic_results(query, report, organic_results):
|
||||
""" Given a blog content and google search organinc results, create a new blog to compete against them."""
|
||||
prompt = f""" As an SEO expert and copywriter, I will provide you with my blog content on topic '{query}', and
|
||||
Top google search results.
|
||||
Your task is to rewrite the given blog to make it compete against top position results.
|
||||
Make sure, the new blog has high probability of ranking highest against given organic search result competitors.
|
||||
Modify the given blog content following best SEO practises.
|
||||
Make sure the blog is original, unique and highly readable.
|
||||
Remember, Maintain and adopt the formatting, structure, style and tone of the provided blog content.
|
||||
Include relevant emojis in your final blog for visual appeal. Use it sparingly.
|
||||
Your response should be well-structured, objective, and critically acclaimed blog article based on provided texts.
|
||||
|
||||
Remember, your goal is to create a detailed blog article that will compete against given organic result competitors.
|
||||
Do not provide explanations, suggestions for your response, reply only with your final response.
|
||||
Take your time in crafting your content, do not rush to give the response.
|
||||
Blog Content: '{report}'\n
|
||||
Organic Search result: '{organic_results}'
|
||||
"""
|
||||
report = gemini_text_response(prompt)
|
||||
return report
|
||||
@@ -37,7 +37,7 @@ from clint.textui import progress
|
||||
#from serpapi import GoogleSearch
|
||||
from loguru import logger
|
||||
from tabulate import tabulate
|
||||
|
||||
from GoogleNews import GoogleNews
|
||||
# Configure logger
|
||||
logger.remove()
|
||||
from dotenv import load_dotenv
|
||||
@@ -49,7 +49,6 @@ logger.add(
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
from .gpt_titles_faq import gpt_titles_faqs_google_search
|
||||
|
||||
#from tenacity import retry, stop_after_attempt, wait_random_exponential
|
||||
#@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
@@ -199,6 +198,15 @@ def perform_dataforseo_google_search():
|
||||
return
|
||||
|
||||
|
||||
def google_news(search_keywords, news_period="7d", region="IN"):
|
||||
""" Get news articles from google_news"""
|
||||
googlenews = GoogleNews()
|
||||
googlenews.enableException(True)
|
||||
googlenews = GoogleNews(lang='en', region=region)
|
||||
googlenews = GoogleNews(period=news_period)
|
||||
print(googlenews.get_news('APPLE'))
|
||||
print(googlenews.search('APPLE'))
|
||||
|
||||
|
||||
def process_search_results(search_results):
|
||||
"""
|
||||
|
||||
@@ -17,10 +17,8 @@ from .tavily_ai_search import get_tavilyai_results
|
||||
from .metaphor_basic_neural_web_search import metaphor_find_similar, metaphor_search_articles
|
||||
from .google_serp_search import google_search
|
||||
from .google_trends_researcher import do_google_trends_analysis
|
||||
from .gpt_blog_sections import get_blog_sections_from_websearch
|
||||
from .web_research_report import write_web_research_report
|
||||
|
||||
|
||||
# Configure logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
@@ -32,60 +30,63 @@ logger.add(sys.stdout,
|
||||
def gpt_web_researcher(search_keywords, time_range=None, include_domains=list(), similar_url=None):
|
||||
""" """
|
||||
print(f"Web Research:Time Range - {time_range},Search Keywords - {search_keywords},Include URLs - {include_domains}")
|
||||
# TBD: Keeping the results directory as fixed, for now.
|
||||
os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "workspace", "web_research_reports", search_keywords.replace(" ", "_") + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
|
||||
if not include_domains:
|
||||
include_domains = list()
|
||||
# TBD: Keeping the results directory as fixed, for now.
|
||||
os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "workspace", "web_research_reports",
|
||||
search_keywords.replace(" ", "_") + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
|
||||
|
||||
# Collect all blog titles featuring in search results. This *may help in generating blog titles
|
||||
# closest to competing ones. All search blog titles, given keyword and keywords from analysis, give
|
||||
# llm a good context for the task of generating blog titles.
|
||||
blog_titles = []
|
||||
# Get a list of FAQs from search results.
|
||||
blog_faqs = None
|
||||
google_result = None
|
||||
tavily_result = None
|
||||
report = None
|
||||
google_search_result = do_google_serp_search(search_keywords)
|
||||
tavily_search_result = do_tavily_ai_search(search_keywords, include_domains)
|
||||
metaphor_search_result = do_metaphor_ai_research(search_keywords, include_domains, time_range, similar_url)
|
||||
gtrends_search_result = do_google_pytrends_analysis(search_keywords)
|
||||
# get_rag_results(search_query)
|
||||
print(f"\n\nReview the analysis in this file at: {os.environ.get('SEARCH_SAVE_FILE')}\n")
|
||||
|
||||
|
||||
def do_google_serp_search(search_keywords):
|
||||
""" """
|
||||
try:
|
||||
logger.info(f"Doing Google search for: {search_keywords}\n")
|
||||
google_result = google_search(search_keywords)
|
||||
blog_titles.append(extract_info(google_result, "titles"))
|
||||
return(google_search(search_keywords))
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Google Serpapi research: {err}")
|
||||
# Not failing, as tavily would do same and then GPT-V to search.
|
||||
|
||||
|
||||
def do_tavily_ai_search(search_keywords, include_domains=None):
|
||||
""" """
|
||||
try:
|
||||
# FIXME: Include the follow-up questions as blog FAQs.
|
||||
logger.info(f"Doing Tavily AI search for: {search_keywords}")
|
||||
tavily_result = get_tavilyai_results(search_keywords, include_domains)
|
||||
blog_titles.append(tavily_extract_information(tavily_result, "titles"))
|
||||
return(get_tavilyai_results(search_keywords, include_domains))
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Tavily AI Search: {err}")
|
||||
|
||||
|
||||
def do_metaphor_ai_research(search_keywords,
|
||||
include_domains=None,
|
||||
time_range=None,
|
||||
similar_url=None):
|
||||
""" """
|
||||
try:
|
||||
logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}")
|
||||
response_articles = metaphor_search_articles(
|
||||
search_keywords,
|
||||
include_domains=include_domains,
|
||||
search_keywords,
|
||||
include_domains=include_domains,
|
||||
time_range=time_range,
|
||||
similar_url=similar_url)
|
||||
blog_titles.append(metaphor_extract_titles_or_text(response_articles, return_titles=True))
|
||||
return response_articles
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Metaphor search: {err}")
|
||||
print(blog_titles)
|
||||
|
||||
|
||||
def do_google_pytrends_analysis(search_keywords):
|
||||
""" """
|
||||
try:
|
||||
logger.info(f"Do Google Trends analysis for given keywords: {search_keywords}")
|
||||
important_keywords = do_google_trends_analysis(search_keywords)
|
||||
return(do_google_trends_analysis(search_keywords))
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do google trends analysis: {err}")
|
||||
print(important_keywords)
|
||||
# Now that we have search results from given keywords. Generate blog title and subtopics suggestions.
|
||||
# 1. Return a list of related keywords along with search volumes.
|
||||
# 2. New blog titles to write on(niche, top) and blog sections.
|
||||
# 3. Competitors list, similar urls if given.
|
||||
print(f"\n\nReview the analysis in this file at: {os.environ.get('SEARCH_SAVE_FILE')}\n")
|
||||
|
||||
|
||||
def metaphor_extract_titles_or_text(json_data, return_titles=True):
|
||||
|
||||
@@ -70,7 +70,10 @@ def metaphor_find_similar(similar_url):
|
||||
raise
|
||||
|
||||
competitors = search_response.results
|
||||
for acompetitor in tqdm(competitors, desc="Processing Competitors", unit="competitor"):
|
||||
urls = {}
|
||||
for c in competitors:
|
||||
print(c.title + ':' + c.url)
|
||||
for acompetitor in tqdm(competitors, desc="Processing URL content", unit="competitor"):
|
||||
all_contents = ""
|
||||
try:
|
||||
search_response = metaphor.search_and_contents(
|
||||
@@ -82,16 +85,15 @@ def metaphor_find_similar(similar_url):
|
||||
logger.error(f"Failed to do metaphor keyword/url research: {err}")
|
||||
|
||||
research_response = search_response.results
|
||||
|
||||
# Add a progress bar for the inner loop
|
||||
for r in tqdm(research_response, desc=f"{acompetitor.url}", unit="research"):
|
||||
all_contents += r.text
|
||||
try:
|
||||
acompetitor.text = summarize_competitor_content(all_contents, "gemini")
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to summarize_web_content: {err}")
|
||||
try:
|
||||
acompetitor.text = summarize_competitor_content(all_contents, "gemini")
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to summarize_web_content: {err}")
|
||||
|
||||
# Convert the data into a list of lists
|
||||
print(competitors)
|
||||
print_search_result(competitors)
|
||||
return search_response
|
||||
|
||||
@@ -142,7 +144,6 @@ def metaphor_search_articles(query,
|
||||
logger.error(f"Failed in metaphor.search_and_contents: {err}")
|
||||
|
||||
# From each webpage, get a summary of the web page.
|
||||
print(search_response)
|
||||
contents_response = search_response.results
|
||||
# for content in tqdm(contents_response, desc="Reading Web URL content:", unit="content"):
|
||||
# summarized_content = summarize_web_content(content.text, "gemini")
|
||||
@@ -160,18 +161,37 @@ def metaphor_search_articles(query,
|
||||
raise
|
||||
|
||||
|
||||
|
||||
def metaphor_news_summarizer(news_keywords):
|
||||
""" build a LLM-based news summarizer app with the Exa API to keep us up-to-date
|
||||
with the latest news on a given topic.
|
||||
"""
|
||||
# FIXME: Needs to be user defined.
|
||||
one_week_ago = (datetime.now() - timedelta(days=7))
|
||||
date_cutoff = one_week_ago.strftime("%Y-%m-%d")
|
||||
|
||||
search_response = exa.search_and_contents(
|
||||
news_keywords, use_autoprompt=True, start_published_date=date_cutoff
|
||||
)
|
||||
|
||||
urls = [result.url for result in search_response.results]
|
||||
print("URLs:")
|
||||
for url in urls:
|
||||
print(url)
|
||||
|
||||
|
||||
def print_search_result(contents_response):
|
||||
# Define the Result namedtuple
|
||||
Result = namedtuple("Result", ["url", "title", "published_date", "text"])
|
||||
Result = namedtuple("Result", ["url", "title", "text"])
|
||||
# Tabulate the data
|
||||
table_headers = ["URL", "Title", "Published Date", "Summary"]
|
||||
table_data = [(result.url, result.title, result.published_date, result.text) for result in contents_response]
|
||||
table_headers = ["URL", "Title", "Summary"]
|
||||
table_data = [(result.url, result.title, result.text) for result in contents_response]
|
||||
|
||||
table = tabulate(table_data,
|
||||
headers=table_headers,
|
||||
tablefmt="fancy_grid",
|
||||
colalign=["left", "left", "left", "left"],
|
||||
maxcolwidths=[20, 20, 10, 60])
|
||||
colalign=["left", "left", "left"],
|
||||
maxcolwidths=[20, 20, 70])
|
||||
print(table)
|
||||
# Save the combined table to a file
|
||||
try:
|
||||
|
||||
@@ -46,7 +46,6 @@ logger.add(sys.stdout,
|
||||
)
|
||||
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
||||
|
||||
from .gpt_titles_faq import gpt_titles_faqs_google_search
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def get_tavilyai_results(keywords, include_urls, search_depth="advanced"):
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
import os
|
||||
|
||||
import requests
|
||||
from clint.textui import progress
|
||||
from loguru import logger
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../../.env'))
|
||||
|
||||
|
||||
|
||||
def search_ydc_index(search_query, num_web_results=10, country="IN", api_key="<api-key>"):
|
||||
def search_ydc_index(search_query, num_web_results=10, country="IN"):
|
||||
"""
|
||||
Search YDC Index API and retrieve results.
|
||||
|
||||
@@ -17,24 +21,20 @@ def search_ydc_index(search_query, num_web_results=10, country="IN", api_key="<a
|
||||
Returns:
|
||||
dict: The response from the YDC Index API in JSON format.
|
||||
"""
|
||||
api_key = os.environ["YOU_API_KEY"]
|
||||
try:
|
||||
url = "https://api.ydc-index.io/search"
|
||||
|
||||
querystring = {
|
||||
"query": search_query,
|
||||
"num_web_results": str(num_web_results),
|
||||
"country": country
|
||||
}
|
||||
|
||||
headers = {"X-API-Key": api_key}
|
||||
|
||||
with progress.Bar(expected_size=num_web_results, label="Searching YDC Index") as bar:
|
||||
response = requests.get(url, headers=headers, params=querystring, stream=True)
|
||||
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
|
||||
|
||||
result_json = response.json()
|
||||
bar.show(result_json.get("web_results", [])) # Update progress bar with the number of web results
|
||||
response = requests.get(url, headers=headers, params=querystring, stream=True)
|
||||
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
|
||||
|
||||
result_json = response.json()
|
||||
return result_json
|
||||
|
||||
except requests.exceptions.RequestException as req_exc:
|
||||
@@ -45,19 +45,20 @@ def search_ydc_index(search_query, num_web_results=10, country="IN", api_key="<a
|
||||
logger.error(f"An error occurred: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
def get_rag_results(search_query, num_web_results=10, country="IN", api_key="<api-key>"):
|
||||
|
||||
def get_rag_results(search_query, num_web_results=10, country="IN"):
|
||||
"""
|
||||
Retrieve RAG (Relevance, Authority, and Goodness) results from YDC Index API.
|
||||
|
||||
Args:
|
||||
search_query (str): The search query.
|
||||
num_web_results (int): Number of web results to retrieve.
|
||||
country (str): Country code.
|
||||
api_key (str): YDC Index API key.
|
||||
country (str): Country code
|
||||
|
||||
Returns:
|
||||
dict: The response from the YDC Index API in JSON format.
|
||||
"""
|
||||
api_key = os.environ["YOU_API_KEY"]
|
||||
try:
|
||||
url = "https://api.ydc-index.io/rag"
|
||||
|
||||
@@ -87,7 +88,7 @@ def get_rag_results(search_query, num_web_results=10, country="IN", api_key="<ap
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
def get_news_results(query, spellcheck=True, api_key="<api-key>"):
|
||||
def get_news_results(query, spellcheck=True):
|
||||
"""
|
||||
Retrieve news results from YDC Index API.
|
||||
|
||||
@@ -99,6 +100,7 @@ def get_news_results(query, spellcheck=True, api_key="<api-key>"):
|
||||
Returns:
|
||||
dict: The response from the YDC Index API in JSON format.
|
||||
"""
|
||||
api_key = os.environ["YOU_API_KEY"]
|
||||
try:
|
||||
url = "https://api.ydc-index.io/news"
|
||||
|
||||
@@ -125,13 +127,3 @@ def get_news_results(query, spellcheck=True, api_key="<api-key>"):
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
# Example usage
|
||||
search_query = "Getting started with llamaindex"
|
||||
result = get_news_results(search_query)
|
||||
print(result)
|
||||
result = get_rag_results(search_query)
|
||||
print(result)
|
||||
result = search_ydc_index(search_query)
|
||||
print(result)
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../.env'))
|
||||
|
||||
from ..gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from ..gpt_providers.gemini_pro_text import gemini_text_response
|
||||
@@ -13,32 +17,26 @@ logger.add(sys.stdout,
|
||||
|
||||
|
||||
# FIXME: Provide num_blogs, num_faqs as inputs.
|
||||
def gpt_titles_faqs_google_search(search_keyword, search_results, gpt_providers="openai"):
|
||||
def write_blog_google_serp(search_keyword, search_results):
|
||||
"""Combine the given online research and gpt blog content"""
|
||||
|
||||
gpt_providers = os.environ["GPT_PROVIDER"]
|
||||
prompt = f"""
|
||||
As a SEO expert and content writer, I will provide you with my web research keyword and its google search result in json format.
|
||||
Your task is to write 1 blog title and 10 FAQs.
|
||||
Your task is to write a SEO optimized, unique blog and 5 FAQs.
|
||||
|
||||
1). Your blog title should compete against all the provided search results.
|
||||
1). Your blog content should compete against all, in the provided search results. Follow best SEO practises.
|
||||
2). Your FAQ should be based on 'People also ask' and 'Related Queries' from given result.
|
||||
Always include answers for each FAQ, use your knowledge and confirm with snippets given in search result.
|
||||
3). Respond in json data with 'blogTitles' and 'FAQs' as json keys. Do not explain, describe your response.
|
||||
4). Follow best practises of SEO.
|
||||
3). Your blog should be detailed, unique and written in markdown language.
|
||||
4). Do not explain, describe your response.
|
||||
|
||||
Web Research Keyword: "{search_keyword}"
|
||||
Google search Result: "{search_results}"
|
||||
"""
|
||||
logger.info("Generating blog title and FAQs from web search result.")
|
||||
if 'gemini' in gpt_providers:
|
||||
logger.info("Generating blog and FAQs from web search result.")
|
||||
if 'google' in gpt_providers:
|
||||
try:
|
||||
response = gemini_text_response(prompt)
|
||||
print(f"\n\n\n RESPONSE: {response}\n\n\n")
|
||||
if '```' in response and '\n' in response:
|
||||
response = response.strip().split('\n')
|
||||
# Remove the first and last lines
|
||||
response = '\n'.join(response[1:-1])
|
||||
response = json.loads(response)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from gemini: {err}")
|
||||
62
lib/ai_writers/combine_blog_and_keywords.py
Normal file
62
lib/ai_writers/combine_blog_and_keywords.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../.env'))
|
||||
|
||||
from ..gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from ..gpt_providers.gemini_pro_text import gemini_text_response
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def blog_with_keywords(blog, keywords):
|
||||
"""Combine the given online research and gpt blog content"""
|
||||
gpt_providers = os.environ["GPT_PROVIDER"]
|
||||
prompt = f"""
|
||||
You are an expert copywriter specializing in content optimization for SEO.
|
||||
I will provide you with my 'blog content' and 'list of keywords' on the same topic.
|
||||
Your task is to write an original blog, using the given keywords and blog content.
|
||||
Your blog should be highly detailed and well formatted.
|
||||
Do not miss out any details from provided blog content.
|
||||
Always, include figures, data, results from given content.
|
||||
It is important that your blog is original and unique. It should be highly readable and SEO optimized.
|
||||
|
||||
|
||||
Blog content: '{blog}'
|
||||
list of keywords: '{keywords}'
|
||||
"""
|
||||
|
||||
if 'google' in gpt_providers:
|
||||
prompt = f"""You are an expert copywriter specializing in content optimization for SEO.
|
||||
I will provide you with my 'blog content' and 'list of keywords' on the same topic.
|
||||
Your task is to write an original blog, using the given keywords and blog content.
|
||||
Your blog should be highly detailed and well formatted.
|
||||
Do not miss out any details from provided blog content.
|
||||
Always, include figures, data, results from given content.
|
||||
It is important that your blog is original and unique. It should be highly readable and SEO optimized.
|
||||
|
||||
|
||||
Blog content: '{blog}'
|
||||
list of keywords: '{keywords}'
|
||||
"""
|
||||
try:
|
||||
response = gemini_text_response(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from gemini: {err}")
|
||||
raise err
|
||||
elif 'openai' in gpt_providers:
|
||||
try:
|
||||
logger.info("Calling OpenAI LLM.")
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"failed to get response from Openai: {err}")
|
||||
raise err
|
||||
@@ -1,7 +1,12 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from .gpt_providers.gemini_pro_text import gemini_text_response
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../.env'))
|
||||
|
||||
from ..gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from ..gpt_providers.gemini_pro_text import gemini_text_response
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
@@ -11,9 +16,9 @@ logger.add(sys.stdout,
|
||||
)
|
||||
|
||||
|
||||
def blog_with_research(report, blog, gpt_providers="openai"):
|
||||
def blog_with_research(report, blog):
|
||||
"""Combine the given online research and gpt blog content"""
|
||||
|
||||
gpt_providers = os.environ["GPT_PROVIDER"]
|
||||
prompt = f"""
|
||||
You are an expert copywriter specializing in content optimization for SEO.
|
||||
I will provide you with a 'research report' and a 'blog content' on the same topic.
|
||||
@@ -25,9 +30,8 @@ def blog_with_research(report, blog, gpt_providers="openai"):
|
||||
2. Sentence Structure: Rephrase while preserving logical flow and coherence.
|
||||
3. Identify Main Keywords: Determine the primary topic and combine the articles on the main topic.
|
||||
4. REMEMBER: From the research report, include links and cititations to make your article more authoratative.
|
||||
5. Write Code snippets: Check if given report is on programming, then write code snippets where applicable.
|
||||
6. Optimize for SEO: Generate high quality informative content.
|
||||
Implement SEO best practises with appropriate keyword density.
|
||||
5. Optimize for SEO: Generate high quality informative content.
|
||||
6. Implement SEO best practises with appropriate keyword density.
|
||||
7. Craft Engaging and Informative Article: Provide value and insight to readers.
|
||||
8. Proofread: Important to Check for grammar, spelling, and punctuation errors.
|
||||
9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases,
|
||||
@@ -47,15 +51,15 @@ def blog_with_research(report, blog, gpt_providers="openai"):
|
||||
Blog content: {blog}
|
||||
"""
|
||||
|
||||
if 'gemini' in gpt_providers:
|
||||
if 'google' in gpt_providers:
|
||||
prompt = f"""You are an expert copywriter specializing in content optimization for SEO.
|
||||
You are world famous writer, known for your originality and engaging content.
|
||||
I will provide you with a 'research report' and a 'blog content' on the same topic.
|
||||
I will provide you with my 'research report' and 'blog content' on the same topic.
|
||||
Your task is to transform and combine the given research and blog content into a blog article.
|
||||
Your blog should be highly detailed and well formatted.
|
||||
Include a section in your blog on the highlights section of blog content.
|
||||
Do not miss out any details from provided content. Always, include figures, data, results from given content.
|
||||
It is important that your blog is original and unique. It should be highly readable and SEO optimized.
|
||||
Your blog should be highly detailed, original and well formatted.
|
||||
Do not miss out any details from provided content.
|
||||
Always, enhance the blog FAQs section with more information from given research.
|
||||
It is important that your blog provides detailed insights and engaging to readers.
|
||||
It should be highly readable and SEO optimized.
|
||||
|
||||
Research report: '{report}'
|
||||
Blog content: '{blog}'
|
||||
90
lib/ai_writers/keywords_to_blog.py
Normal file
90
lib/ai_writers/keywords_to_blog.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../../.env'))
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search,\
|
||||
do_tavily_ai_search, do_metaphor_ai_research, do_google_pytrends_analysis
|
||||
from .blog_from_google_serp import write_blog_google_serp
|
||||
from .combine_research_and_blog import blog_with_research
|
||||
from .combine_blog_and_keywords import blog_with_keywords
|
||||
from ..ai_web_researcher.you_web_reseacher import get_rag_results, search_ydc_index
|
||||
|
||||
|
||||
def write_blog_from_keywords(search_keywords, url=None, output_format="markdown"):
|
||||
"""
|
||||
This function will take a blog Topic to first generate sections for it
|
||||
and then generate content for each section.
|
||||
"""
|
||||
# TBD: Keeping the results directory as fixed, for now.
|
||||
os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "workspace", "web_research_reports",
|
||||
search_keywords.replace(" ", "_") + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
|
||||
logger.info(f"Researching and Writing Blog on keywords: {search_keywords}")
|
||||
# Use to store the blog in a string, to save in a *.md file.
|
||||
blog_markdown_str = ""
|
||||
|
||||
# Call on the got-researcher, tavily apis for this. Do google search for organic competition.
|
||||
google_search_result = do_google_serp_search(search_keywords)
|
||||
blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result)
|
||||
# logger.info/check the final blog content.
|
||||
logger.info(f"Final blog content: {blog_markdown_str}")
|
||||
|
||||
# Do Tavily AI research to augument the above blog.
|
||||
tavily_search_result = do_tavily_ai_search(search_keywords)
|
||||
blog_markdown_str = blog_with_research(blog_markdown_str, tavily_search_result)
|
||||
logger.info(f"Final blog content: {blog_markdown_str}")
|
||||
|
||||
# Do Metaphor/Exa AI search.
|
||||
metaphor_search_result = do_metaphor_ai_research(search_keywords)
|
||||
blog_markdown_str = blog_with_research(blog_markdown_str, metaphor_search_result)
|
||||
logger.info(f"Final blog content: {blog_markdown_str}")
|
||||
|
||||
# Do Google trends analysis and combine with latest blog.
|
||||
pytrends_search_result = do_google_pytrends_analysis(search_keywords)
|
||||
blog_markdown_str = blog_with_keywords(blog_markdown_str, pytrends_search_result)
|
||||
logger.info(f"Final blog content: {blog_markdown_str}")
|
||||
|
||||
# Combine YOU.com RAG search with the latest blog content.
|
||||
#you_rag_result = get_rag_results(search_keywords)
|
||||
you_search_result = search_ydc_index(search_keywords)
|
||||
blog_markdown_str = blog_with_research(blog_markdown_str, you_search_result)
|
||||
logger.info(f"Final blog content: {blog_markdown_str}")
|
||||
|
||||
exit(1)
|
||||
|
||||
blog_title = generate_blog_title(blog_markdown_str, "gemini")
|
||||
blog_meta_desc = generate_blog_description(blog_markdown_str, "gemini")
|
||||
logger.info(f"The blog meta description is: {blog_meta_desc}\n")
|
||||
blog_tags = get_blog_tags(blog_markdown_str, "gemini")
|
||||
logger.info(f"Blog tags for generated content: {blog_tags}")
|
||||
blog_categories = get_blog_categories(blog_markdown_str, "gemini")
|
||||
logger.info(f"Generated blog categories: {blog_categories}\n")
|
||||
|
||||
#blog_markdown_str = gemini_get_code_samples(blog_markdown_str)
|
||||
#logger.info(f"Blog with code sample: \n {blog_markdown_str}")
|
||||
|
||||
# fixme: Remove the hardcoding, need add another option OR in config ?
|
||||
image_dir = os.path.join(os.getcwd(), "blog_images")
|
||||
generated_image_name = f"generated_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.png"
|
||||
generated_image_filepath = os.path.join(image_dir, generated_image_name)
|
||||
# Generate an image based on meta description
|
||||
#logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
|
||||
#main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
|
||||
if url:
|
||||
try:
|
||||
generated_image_filepath = screenshot_api(url, generated_image_filepath)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed in taking compnay page screenshot: {err}")
|
||||
# TBD: Save the blog content as a .md file. Markdown or HTML ?
|
||||
save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath)
|
||||
|
||||
logger.info(f"\n\n ################ Finished writing Blog for : {akeyword} #################### \n")
|
||||
@@ -1,37 +0,0 @@
|
||||
########################################################################
|
||||
#
|
||||
# Common module for getting response from gpt for given prompt.
|
||||
# This module includes following capabilities:
|
||||
#
|
||||
#
|
||||
#
|
||||
########################################################################
|
||||
|
||||
import json
|
||||
import os
|
||||
import datetime #I wish
|
||||
import sys
|
||||
import time
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
# Load configuration
|
||||
#with open('config.json') as config_file:
|
||||
# config = json.load(config_file)
|
||||
|
||||
#wordpress_url = config['wordpress_url']
|
||||
# fixme: Remove the hardcoding, need add another option OR in config ?
|
||||
image_dir = "blog_images"
|
||||
image_dir = os.path.join(os.getcwd(), image_dir)
|
||||
# TBD: This can come from config file.
|
||||
output_path = "blogs"
|
||||
output_path = os.path.join(os.getcwd(), output_path)
|
||||
wordpress_url = ''
|
||||
wordpress_username = ''
|
||||
wordpress_password = ''
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
import datetime
|
||||
|
||||
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
import google.generativeai as genai
|
||||
from .gpt_providers.gemini_pro_text import gemini_text_response
|
||||
from .gpt_online_researcher import do_online_research
|
||||
from .get_blog_meta_desc import generate_blog_description
|
||||
from .get_tags import get_blog_tags
|
||||
from .get_blog_category import get_blog_categories
|
||||
from .get_blog_title import generate_blog_title
|
||||
from .get_code_examples import gemini_get_code_samples
|
||||
from .save_blog_to_file import save_blog_to_file
|
||||
from .take_url_screenshot import screenshot_api
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../.env'))
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def generate_keyword_blog(blog_keywords, url=None, output_format="markdown"):
|
||||
"""
|
||||
This function will take a blog Topic to first generate sections for it
|
||||
and then generate content for each section.
|
||||
"""
|
||||
for akeyword in blog_keywords:
|
||||
logger.info(f"Researching and Writing Blog on keywords: {akeyword}")
|
||||
# Use to store the blog in a string, to save in a *.md file.
|
||||
blog_markdown_str = ""
|
||||
|
||||
# Call on the got-researcher, tavily apis for this. Do google search for organic competition.
|
||||
blog_markdown_str = do_online_research(akeyword, "gemini")
|
||||
# logger.info/check the final blog content.
|
||||
logger.info(f"Final blog content: {blog_markdown_str}")
|
||||
|
||||
blog_title = generate_blog_title(blog_markdown_str, "gemini")
|
||||
blog_meta_desc = generate_blog_description(blog_markdown_str, "gemini")
|
||||
logger.info(f"The blog meta description is: {blog_meta_desc}\n")
|
||||
blog_tags = get_blog_tags(blog_markdown_str, "gemini")
|
||||
logger.info(f"Blog tags for generated content: {blog_tags}")
|
||||
blog_categories = get_blog_categories(blog_markdown_str, "gemini")
|
||||
logger.info(f"Generated blog categories: {blog_categories}\n")
|
||||
|
||||
#blog_markdown_str = gemini_get_code_samples(blog_markdown_str)
|
||||
#logger.info(f"Blog with code sample: \n {blog_markdown_str}")
|
||||
|
||||
# fixme: Remove the hardcoding, need add another option OR in config ?
|
||||
image_dir = os.path.join(os.getcwd(), "blog_images")
|
||||
generated_image_name = f"generated_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.png"
|
||||
generated_image_filepath = os.path.join(image_dir, generated_image_name)
|
||||
# Generate an image based on meta description
|
||||
#logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
|
||||
#main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
|
||||
if url:
|
||||
try:
|
||||
generated_image_filepath = screenshot_api(url, generated_image_filepath)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed in taking compnay page screenshot: {err}")
|
||||
# TBD: Save the blog content as a .md file. Markdown or HTML ?
|
||||
save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath)
|
||||
|
||||
logger.info(f"\n\n ################ Finished writing Blog for : {akeyword} #################### \n")
|
||||
Reference in New Issue
Block a user