WIP- Try AI-Writer and Web research; working. Working on usuability aspects.

This commit is contained in:
AjaySi
2024-03-06 15:39:02 +05:30
parent 16e68c8959
commit 27f6952dd0
29 changed files with 399 additions and 602 deletions

View File

@@ -66,23 +66,23 @@ def google_search(query):
Returns:
list: List of search results based on the specified flag.
"""
try:
perform_serpapi_google_search(query)
logger.info(f"FIXME: Google serapi: {query}")
#return process_search_results(search_result)
except Exception as err:
logger.error(f"ERROR: Check Here: https://serpapi.com/. Your requests may be over. {err}")
#try:
# perform_serpapi_google_search(query)
# logger.info(f"FIXME: Google serapi: {query}")
# #return process_search_results(search_result)
#except Exception as err:
# logger.error(f"ERROR: Check Here: https://serpapi.com/. Your requests may be over. {err}")
# Retry with serper.dev
try:
logger.info("Trying Google search with Serper.dev: https://serper.dev/api-key")
search_result = perform_serperdev_google_search(query)
process_search_results(search_result)
return(search_result)
except Exception as err:
logger.error(f"Failed to do Google search with serper.dev: {err}")
return(search_result)
# # Retry with BROWSERLESS API
# try:
# search_result = perform_browserless_google_search(query)
@@ -118,7 +118,10 @@ def perform_serpapi_google_search(query, location="in"):
try:
# Check if API key is provided
if not os.getenv("SERPAPI_KEY"):
raise ValueError("SERPAPI_KEY key is required for SerpApi")
#raise ValueError("SERPAPI_KEY key is required for SerpApi")
logger.error("SERPAPI_KEY key is required for SerpApi")
return
# Create a GoogleSearch instance
search = GoogleSearch({
@@ -164,7 +167,7 @@ def perform_serperdev_google_search(query):
"q": query,
"gl": "in",
"hl": "en",
"num": 5,
"num": 10,
"autocorrect": True,
"page": 1,
"type": "search",

View File

@@ -23,6 +23,8 @@ Note: Ensure that the required libraries are installed using 'pip install pytren
"""
import os
import time # I wish
import random
import requests
import numpy as np
import sys
@@ -186,6 +188,7 @@ def get_related_topics_and_save_csv(search_keywords):
data = pytrends.related_topics()
except Exception as err:
logger.error(f"Failed to get pytrends realted topics: {err}")
return
# Extract data from the result
top_topics = list(data.values())[0]['top']
rising_topics = list(data.values())[0]['rising']
@@ -238,6 +241,8 @@ def get_results(query):
try:
query = urllib.parse.quote_plus(query)
response = get_source(f"https://suggestqueries.google.com/complete/search?output=chrome&hl=en&q={query}")
time.sleep(random.uniform(0.1, 0.6))
if response:
response.raise_for_status()
results = json.loads(response.text)
@@ -501,6 +506,8 @@ def do_google_trends_analysis(search_term):
else:
all_the_keywords.append(suggestions_df['Keywords'].tolist())
all_the_keywords = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in all_the_keywords])
# Generate a random sleep time between 2 and 3 seconds
time.sleep(random.uniform(2, 3))
#
# # FIXME: Get result from vision GPT. Fetch and visualize Google Trends data
@@ -510,12 +517,16 @@ def do_google_trends_analysis(search_term):
# result_df = plot_interest_by_region(search_term)
#
# Display additional information
result_df = get_related_topics_and_save_csv(search_term)
# Extract 'Top' topic_title
top_topic_title = result_df['topic_title'].values.tolist()
# Join each sublist into one string separated by comma
#top_topic_title = [','.join(filter(None, map(str, sublist))) for sublist in top_topic_title]
top_topic_title = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in top_topic_title])
try:
result_df = get_related_topics_and_save_csv(search_term)
# Extract 'Top' topic_title
if result_df:
top_topic_title = result_df['topic_title'].values.tolist()
# Join each sublist into one string separated by comma
#top_topic_title = [','.join(filter(None, map(str, sublist))) for sublist in top_topic_title]
top_topic_title = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in top_topic_title])
except Exception as err:
logger.error(f"Failed to get results from google trends related topics: {err}")
# TBD: Not getting great results OR unable to understand them.
#all_the_keywords += top_topic_title

View File

@@ -9,7 +9,6 @@ import json
from pathlib import Path
import sys
from typing import List, NamedTuple
from loguru import logger
from datetime import datetime
from ..gpt_providers.gemini_pro_text import gemini_text_response
@@ -17,8 +16,9 @@ from .tavily_ai_search import get_tavilyai_results
from .metaphor_basic_neural_web_search import metaphor_find_similar, metaphor_search_articles
from .google_serp_search import google_search
from .google_trends_researcher import do_google_trends_analysis
from .web_research_report import write_web_research_report
#from .web_research_report import write_web_research_report
from loguru import logger
# Configure logger
logger.remove()
logger.add(sys.stdout,

View File

@@ -66,7 +66,7 @@ def get_tavilyai_results(keywords, include_urls, search_depth="advanced"):
# Retrieve API keys
api_key = os.getenv('TAVILY_API_KEY')
if not api_key:
raise ValueError("API keys for Tavily or OpenAI are not set.")
raise ValueError("API keys for Tavily is Not set.")
# Initialize Tavily client
try:

View File

@@ -1,11 +1,10 @@
from langchain.adapters.openai import convert_openai_messages
from langchain.chat_models import ChatOpenAI
import os
from ..gpt_providers.gemini_pro_text import gemini_text_response
def write_web_research_report(web_research, faq_questions, gpt_provider="gemini"):
def write_web_research_report(web_research, faq_questions):
""" """
gpt_provider = os.environ["GPT_PROVIDER"]
if "gemini" in gpt_provider:
prompt = ["You are an SEO and marketing expert, who writes unique, factual and comprehensive research reports."
"I will provide you web research report as json data and a list of related FAQ questions."

View File

@@ -34,14 +34,14 @@ def write_blog_google_serp(search_keyword, search_results):
Google search Result: "{search_results}"
"""
logger.info("Generating blog and FAQs from web search result.")
if 'google' in gpt_providers:
if 'google' in gpt_providers.lower():
try:
response = gemini_text_response(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from gemini: {err}")
raise err
elif 'openai' in gpt_providers:
elif 'openai' in gpt_providers.lower():
try:
logger.info("Calling OpenAI LLM.")
response = openai_chatgpt(prompt)

View File

@@ -30,7 +30,7 @@ def blog_with_keywords(blog, keywords):
list of keywords: '{keywords}'
"""
if 'google' in gpt_providers:
if 'google' in gpt_providers.lower():
prompt = f"""You are an expert copywriter specializing in content optimization for SEO.
I will provide you with my 'blog content' and 'list of keywords' on the same topic.
Your task is to write an original blog, using the given keywords and blog content.
@@ -39,7 +39,6 @@ def blog_with_keywords(blog, keywords):
Always, include figures, data, results from given content.
It is important that your blog is original and unique. It should be highly readable and SEO optimized.
Blog content: '{blog}'
list of keywords: '{keywords}'
"""
@@ -49,7 +48,7 @@ def blog_with_keywords(blog, keywords):
except Exception as err:
logger.error(f"Failed to get response from gemini: {err}")
raise err
elif 'openai' in gpt_providers:
elif 'openai' in gpt_providers.lower():
try:
logger.info("Calling OpenAI LLM.")
response = openai_chatgpt(prompt)

View File

@@ -20,10 +20,10 @@ def blog_with_research(report, blog):
"""Combine the given online research and gpt blog content"""
gpt_providers = os.environ["GPT_PROVIDER"]
prompt = f"""
You are an expert copywriter specializing in content optimization for SEO.
You are an expert copywriter specializing in SEO content optimization for blogs.
I will provide you with a 'research report' and a 'blog content' on the same topic.
Your task is to transform and combine the given research and blog content into a well-structured markdown, unique
and engaging blog article.
Your task is to transform and combine the given 'research report' and 'blog content' into a well-structured, unique
and original blog article.
Your objectives include:
1. Master the report and blog content: Understand main ideas, key points, and the core message.
@@ -47,11 +47,11 @@ def blog_with_research(report, blog):
that will rank well in search engine results and engage readers effectively.
Create a blog post, in markdown, from the given research report and blog content below.
Research report: {report}
Blog content: {blog}
Research report: '{report}'
Blog content: '{blog}'
"""
if 'google' in gpt_providers:
if 'google' in gpt_providers.lower():
prompt = f"""You are an expert copywriter specializing in content optimization for SEO.
I will provide you with my 'research report' and 'blog content' on the same topic.
Your task is to transform and combine the given research and blog content into a blog article.
@@ -70,7 +70,7 @@ def blog_with_research(report, blog):
except Exception as err:
logger.error(f"Failed to get response from gemini: {err}")
raise err
elif 'openai' in gpt_providers:
elif 'openai' in gpt_providers.lower():
try:
logger.info("Calling OpenAI LLM.")
response = openai_chatgpt(prompt)
@@ -78,3 +78,6 @@ def blog_with_research(report, blog):
except Exception as err:
logger.error(f"failed to get response from Openai: {err}")
raise err
else:
logger.error(f"Unrecognised/Un-Supoorted GPT_PROVIDER: {gpt_providers}\n")
return

View File

@@ -1,4 +1,5 @@
import sys
import os
import json
from ..gpt_providers.openai_chat_completion import openai_chatgpt
@@ -13,9 +14,9 @@ logger.add(sys.stdout,
# FIXME: Provide num_blogs, num_faqs as inputs.
def get_blog_sections_from_websearch(search_keyword, search_results, gpt_providers="gemini"):
def get_blog_sections_from_websearch(search_keyword, search_results):
"""Combine the given online research and gpt blog content"""
gpt_providers = os.environ["GPT_PROVIDER"]
prompt = f"""
As a SEO expert and content writer, I will provide you with a search keyword and its google search result.
Your task is to write a blog title and 5 blog sub titles, from the given google search result.

View File

@@ -1,5 +1,6 @@
import sys
import os
from textwrap import dedent
from pathlib import Path
from datetime import datetime
@@ -32,41 +33,42 @@ def write_blog_from_keywords(search_keywords, url=None):
# TBD: Keeping the results directory as fixed, for now.
os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "workspace", "web_research_reports",
search_keywords.replace(" ", "_") + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
logger.info(f"Researching and Writing Blog on keywords: {search_keywords}")
# Use to store the blog in a string, to save in a *.md file.
blog_markdown_str = ""
example_blog_titles = []
logger.info(f"Researching and Writing Blog on keywords: {search_keywords}")
# Call on the got-researcher, tavily apis for this. Do google search for organic competition.
google_search_result, g_titles = do_google_serp_search(search_keywords)
example_blog_titles.append(g_titles)
blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result)
# logger.info/check the final blog content.
logger.info(f"Final blog content: {blog_markdown_str}")
logger.info(f"######### Blog content Google SERP research: ###########\n\n{blog_markdown_str}\n\n")
# Do Tavily AI research to augument the above blog.
tavily_search_result, t_titles = do_tavily_ai_search(search_keywords)
example_blog_titles.append(t_titles)
blog_markdown_str = blog_with_research(blog_markdown_str, tavily_search_result)
logger.info(f"Final blog content: {blog_markdown_str}")
if tavily_search_result:
blog_markdown_str = blog_with_research(blog_markdown_str, tavily_search_result)
logger.info(f"######### Blog content after Tavily AI research: ######### \n\n{blog_markdown_str}\n\n")
try:
# Do Metaphor/Exa AI search.
metaphor_search_result, m_titles = do_metaphor_ai_research(search_keywords)
example_blog_titles.append(m_titles)
blog_markdown_str = blog_with_research(blog_markdown_str, metaphor_search_result)
logger.info(f"Final blog content: {blog_markdown_str}")
logger.info(f"######## Blog content after EXA AI research: ########## \n\n{blog_markdown_str}\n\n")
except Exception as err:
logger.error(f"Failed to do Metaphor AI search: {err}")
# Do Google trends analysis and combine with latest blog.
try:
pytrends_search_result = do_google_pytrends_analysis(search_keywords)
logger.info(f"Google Trends keywords to use in the blog: {pytrends_search_result}\n")
blog_markdown_str = blog_with_keywords(blog_markdown_str, pytrends_search_result)
except Exception as err:
logger.error(f"Failed to do Google Trends Analysis:{err}")
blog_markdown_str = blog_proof_editor(blog_markdown_str, search_keywords)
logger.info(f"Final blog content: {blog_markdown_str}")
logger.info(f"########### Blog Content After Google Trends Analysis:######### \n {blog_markdown_str}\n\n")
# Combine YOU.com RAG search with the latest blog content.
#you_rag_result = get_rag_results(search_keywords)
@@ -74,6 +76,8 @@ def write_blog_from_keywords(search_keywords, url=None):
#blog_markdown_str = blog_with_research(blog_markdown_str, you_search_result)
#logger.info(f"Final blog content: {blog_markdown_str}")
blog_markdown_str = blog_proof_editor(blog_markdown_str, search_keywords)
blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str,
search_keywords, example_blog_titles)
@@ -92,4 +96,12 @@ def write_blog_from_keywords(search_keywords, url=None):
# TBD: Save the blog content as a .md file. Markdown or HTML ?
save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath)
blog_frontmatter = dedent(f"""\n\n\n\
---
title: {blog_title}
categories: [{blog_categories}]
tags: [{blog_tags}]
Meta description: {blog_meta_desc.replace(":", "-")}
---\n\n""")
logger.info(f"{blog_frontmatter}{blog_markdown_str}")
logger.info(f"\n\n ################ Finished writing Blog for : {search_keywords} #################### \n")

View File

@@ -27,13 +27,13 @@ def get_blog_categories(blog_article):
The blog content is: '{blog_article}'"
"""
logger.info("Generating blog categories for the given blog.")
if 'google' in gpt_providers:
if 'google' in gpt_providers.lower():
try:
response = gemini_text_response(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from gemini: {err}")
elif 'openai' in gpt_providers:
elif 'openai' in gpt_providers.lower():
try:
response = openai_chatgpt(prompt)
return response

View File

@@ -27,13 +27,13 @@ def generate_blog_description(blog_content):
Respond with only one of your best effort and do not include your explanations.
Blog Content: '{blog_content}'"""
if 'google' in gpt_providers:
if 'google' in gpt_providers.lower():
try:
response = gemini_text_response(prompt)
return response
except Exception as err:
logger.error("Failed to get response from gemini.")
elif 'openai' in gpt_providers:
elif 'openai' in gpt_providers.lower():
try:
response = openai_chatgpt(prompt)
return response

View File

@@ -42,13 +42,22 @@ def generate_blog_title(blog_article, keywords=None, example_titles=None, num_ti
Blog Keywords: '{keywords}'
Example Titles: '{example_titles}'
"""
if 'google' in gpt_providers:
elif not example_titles:
prompt = prompt = f"""As a SEO expert, I will provide you with my blog article.
Your task is to write {num_titles} blog title.
Follow SEO best practises to suggest the blog title.
Please keep the titles concise, not exceeding 60 words.
Respond with only {num_titles} title and no explanations.
Negative Keywords: Unvieling, unleash, power of. Dont use such words in your title.
Blog Article: '{keywords}'
"""
if 'google' in gpt_providers.lower():
try:
response = gemini_text_response(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from gemini: {err}")
elif 'openai' in gpt_providers:
elif 'openai' in gpt_providers.lower():
try:
logger.info("Calling OpenAI LLM.")
response = openai_chatgpt(prompt)

View File

@@ -25,13 +25,13 @@ def get_blog_tags(blog_article):
for the given blog content. Only reply with comma separated values.
Blog content: {blog_article}."""
logger.info("Generating Blog tags for the given blog post.")
if 'google' in gpt_providers:
if 'google' in gpt_providers.lower():
try:
response = gemini_text_response(prompt)
return response
except Exception as err:
logger.error("Failed to get response from gemini.")
elif 'openai' in gpt_providers:
elif 'openai' in gpt_providers.lower():
try:
response = openai_chatgpt(prompt)
return response

View File

@@ -26,19 +26,19 @@ def blog_proof_editor(blog_content, blog_keywords):
4). Tone and Brand Alignment: Adjust the tone, voice, personality of given content to make it unique.
5). Optimize Content Structure: Reorganize the content for a more impactful presentation,
including better paragraphing and transitions.
6). Simplify given content: Simplify concepts and replace overly complex jargons and words.
6). Simplify content: Simplify concepts and replace overly complex words. Use simple english words.
7). Refine Overall Structure: Make structural changes to improve the overall impact of the content.
\n\nMain keywords: '{blog_keywords}'
My Blog: '{blog_content}'. """
if 'openai' in gpt_provider:
if 'openai' in gpt_provider.lower():
try:
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Openai Error Blog Proof Reading: {err}")
elif 'google' in gpt_provider:
elif 'google' in gpt_provider.lower():
try:
response = gemini_text_response(prompt)
return response

View File

@@ -57,13 +57,13 @@ def convert_tomarkdown_format(blog_content, gpt_provider="openai"):
Blog Post: '{blog_content}'"""
if 'openai' in gpt_provider:
if 'openai' in gpt_provider.lower():
try:
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Openai Error in converting to Markdown format.")
elif 'gemini' in gpt_provider:
elif 'gemini' in gpt_provider.lower():
prompt = f""" Convert the given blog post into well structured MARKDOWN content.
Do not alter the given blog post.

View File

@@ -17,14 +17,8 @@ import openai
from openai import OpenAI
from pytube import YouTube
import tempfile
from html2image import Html2Image
import datetime
from PIL import Image
import moviepy.editor as mp
import requests
from moviepy.editor import AudioFileClip
from concurrent.futures import ThreadPoolExecutor
from loguru import logger
logger.remove()