WIP000.1- AI content writer

This commit is contained in:
AjaySi
2024-01-03 16:59:17 +05:30
parent 8f89de7b69
commit b51e9a8c2f
32 changed files with 854 additions and 506 deletions

View File

@@ -17,10 +17,18 @@
##############################################################
import os
from pathlib import Path
import logging
from tavily import TavilyClient
import serpapi
from dotenv import load_dotenv
load_dotenv(Path('../.env'))
from langchain.adapters.openai import convert_openai_messages
from langchain.chat_models import ChatOpenAI
import google.generativeai as genai
logging.basicConfig(level=logging.INFO, format='%(asctime)s-%(levelname)s-%(module)s-%(lineno)d-%(message)s')
from tenacity import (
@@ -29,9 +37,26 @@ from tenacity import (
wait_random_exponential,
) # for exponential backoff
from .gpt_providers.gemini_pro_text import gemini_text_response
from .blog_proof_reader import blog_proof_editor
from .convert_content_to_markdown import convert_tomarkdown_format
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def do_online_research(query):
def do_online_research(query, gpt_provider="openai"):
# Do a google search for the given keyword. The search results will give urls, questions for faq
faq_questions = []
organic_results = []
report = ''
try:
faq_questions = google_search(query, "faq")
logging.info(f"Google search FAQ questions: {faq_questions}")
# Now, get top 10 google organic results and polish the content to compete for these keywords.
organic_results = google_search(query, "organic_result")
except Exception as err:
logging.error(f"Failed to do Serpapi research: {err}")
# Not failing, as tavily would do same and then GPT-V to search.
#exit(1)
try:
# Retrieve API keys
api_key = os.getenv('TAVILY_API_KEY')
@@ -45,7 +70,6 @@ def do_online_research(query):
except Exception as err:
logging.error("Failed to create Tavily client. Check TAVILY_API_KEY")
exit(1)
# Run tavily search
logging.info(f"Running Tavily search on: {query}")
try:
@@ -54,30 +78,104 @@ def do_online_research(query):
logging.error(f"Failed to do Tavily Research: {err}")
exit(1)
# Setup prompt for GPT-4
prompt = [{
"role": "system",
"content": ('You are an AI critical thinker research assistant. '
if "gemini" in gpt_provider:
prompt = ["You are an AI critical thinker research assistant."
"I will provide you with json content and a list of faq questions."
"Use given json as context for writing your research report."
"Your sole purpose is to write well written, critically acclaimed, objective and structured research report"
"Important: Include and write code examples in your final report."
"Include your own insights on the topic to make it comprehensive and detailed."
"Use the urls from json content to provide cititations and include it in referances section of your report."
"Include appropriate emojis in your research report."
"Include FAQs relevant to your research report. Use the given faq questions. Write answers for each faq."
"Format your report in MLA format and markdown style, with special focus on readibility."
f"Do not provide explanations for your response.\njson content: \"\"\" {content} \"\"\"\n "
f"\nList of FAQ questions: \"\"\" {faq_questions} \"\"\"\n"]
report = gemini_text_response(prompt)
elif "openai" in gpt_provider:
# Setup prompt for GPT-4
prompt = [{
"role": "system",
"content": ('You are an AI critical thinker research assistant. '
'Your sole purpose is to write well written, critically acclaimed, '
'objective and structured reports on given text.')
}, {
"role": "user",
"content": (f'Information: """{content}"""\n\n'
}, {
"role": "user",
"content": (f'Information: """{content}"""\n\n'
f'Using the above information, answer the following '
f'query: "{query}" in a detailed report --'
f'Please use MLA format and markdown syntax.')
}]
# Run GPT-4
logging.info("Generating Research report with GPT-4...")
lc_messages = convert_openai_messages(prompt)
try:
report = ChatOpenAI(model='gpt-4', openai_api_key=openai_api_key).invoke(lc_messages).content
#logging.info(f"\n Below is the online research report for given keywords/title: \n\n{report}")
return report
except Exception as err:
logging.error("Failed to generate do_online_research with ChatOpenAI")
exit(1)
}]
report = openai_research_report(prompt)
report = compete_organic_results(query, report, organic_results)
return report
except Exception as e:
logging.error(f"Failed in online research: {e}")
exit(1)
def openai_research_report(query):
""" Generate research report with openai """
# Run GPT-4
logging.info("Generating Research report with GPT-4...")
lc_messages = convert_openai_messages(prompt)
try:
report = ChatOpenAI(model='gpt-4', openai_api_key=openai_api_key).invoke(lc_messages).content
#logging.info(f"\n Below is the online research report for given keywords/title: \n\n{report}")
return report
except Exception as err:
logging.error("Failed to generate do_online_research with ChatOpenAI")
exit(1)
def compete_organic_results(query, report, organic_results):
""" Given a blog content and google search organinc results, create a new blog to compete against them."""
prompt = f""" As an SEO expert and copywriter, I will provide you with my blog content on topic '{query}', and
Top google search results.
Your task is to rewrite the given blog to make it compete against top position results.
Make sure, the new blog has high probability of ranking highest against given organic search result competitors.
Modify the given blog content following best SEO practises.
Make sure the blog is original, unique and highly readable.
Remember, Maintain and adopt the formatting, structure, style and tone of the provided blog content.
Include relevant emojis in your final blog for visual appeal. Use it sparingly.
Your response should be well-structured, objective, and critically acclaimed blog article based on provided texts.
Remember, your goal is to create a detailed blog article that will compete against given organic result competitors.
Do not provide explanations, suggestions for your response, reply only with your final response.
Take your time in crafting your content, do not rush to give the response.
Blog Content: '{report}'\n
Organic Search result: '{organic_results}'
"""
report = gemini_text_response(prompt)
return report
def google_search(query, flag="faq"):
""" Do google search for given query """
try:
api_key = os.getenv('SERPAPI_KEY')
client = serpapi.Client(api_key=api_key)
result = client.search(
q=query,
engine="google",
hl="en",
)
except Exception as err:
logging.error(f"Failed in Google Search: {err}")
exit(1)
if 'faq' in flag:
# Check if 'inline_people_also_search_for' and 'related_questions' exist in result
related_search = [item['title'] for item in result.get('inline_people_also_search_for', [])]
related_questions = [item['question'] for item in result.get('related_questions', [])]
# Determine which list to use for faq_questions
if not related_search and not related_questions:
faq_questions = [item['query'] for item in result.get('related_searches', [])]
else:
faq_questions = related_search + related_questions
return faq_questions
elif 'organic_result' in flag:
# Check if 'organic_results' exists in result
return result.get('organic_results', [])