WIP-AI writer, Try Web research working.

This commit is contained in:
AjaySi
2024-02-18 10:00:24 +05:30
parent 2a3315f211
commit d89d9ad3d2
21 changed files with 117 additions and 1512 deletions

View File

@@ -213,7 +213,6 @@ def process_search_results(search_results):
data = []
logger.info(f"Google Search Parameters: {search_results.get('searchParameters', {})}")
organic_results = search_results.get("organic", [])
print(search_results)
# Displaying Organic Results
organic_data = []

View File

@@ -7,7 +7,7 @@ Features:
- Visualizes Google Trends data, including interest over time and interest by region.
- Retrieves related queries and topics for a set of search keywords.
- Utilizes visualization libraries such as Matplotlib, Plotly, and Rich for displaying results.
- Incorporates logging for error handling and informative messages.
- Incorporates logger.for error handling and informative messages.
Usage:
- Provide a search term or a list of search terms for analysis.
@@ -22,6 +22,7 @@ Modifications:
Note: Ensure that the required libraries are installed using 'pip install pytrends requests_html tqdm tabulate plotly rich'.
"""
import os
import requests
import numpy as np
import sys
@@ -37,14 +38,12 @@ import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
import logging
from requests_html import HTML, HTMLSession
from urllib.parse import quote_plus
from tqdm import tqdm
from tabulate import tabulate
from pytrends.request import TrendReq
import wordcloud
logging.basicConfig(level=logging.INFO)
from wordcloud import WordCloud
from loguru import logger
# Configure logger
@@ -75,7 +74,7 @@ def fetch_google_trends_interest_overtime(keyword):
return data
except Exception as e:
logging.error(f"Error in fetch_google_trends_data: {e}")
logger.error(f"Error in fetch_google_trends_data: {e}")
return pd.DataFrame()
@@ -151,10 +150,11 @@ def get_related_queries_and_save_csv(keywords, hl='en-US', tz=360, cat=0, timefr
print("\n\033[1m🔝 Top\033[0m: The most popular search queries. Scoring is on a relative scale where a value of 100 is the most commonly searched query, 50 is a query searched half as often, and a value of 0 is a query searched for less than 1% as often as the most popular query.\n")
print("\n\033[1m🚀 Rising\033[0m: Queries with the biggest increase in search frequency since the last time period. Results marked 'Breakout' had a tremendous increase, probably because these queries are new and had few (if any) prior searches.\n")
# Display the DataFrame using tabulate
print(tabulate(all_queries_df, headers='keys', tablefmt='fancy_grid'))
table = tabulate(all_queries_df, headers='keys', tablefmt='fancy_grid')
print(table)
# Save the combined table to a file
try:
save_in_file(all_queries_df)
save_in_file(table)
except Exception as save_results_err:
logger.error(f"Failed to save search results: {save_results_err}")
return top_rising_queries
@@ -178,6 +178,7 @@ def get_related_topics_and_save_csv(search_keywords):
pytrends = TrendReq(hl='en-US', tz=360)
# Build payload
# FIXME: Remove hardcoding.
pytrends.build_payload(search_keywords, cat=0, timeframe='today 12-m')
# Get related topics
@@ -198,10 +199,7 @@ def get_related_topics_and_save_csv(search_keywords):
# Rename columns to avoid duplicates and provide meaningful names
df_top_topics.columns = ['Top- ' + col if col != 'topic_title' else col for col in df_top_topics.columns]
df_rising_topics.columns = ['Rising- ' + col if col != 'topic_title' else col for col in df_rising_topics.columns]
# Save to CSV
all_topics_df = pd.concat([df_top_topics, df_rising_topics], axis=1)
#all_topics_df.to_csv('related_topics.csv', index=False)
print(f"\n\n 📢❗🚨 Rising and Trending Keywords for {search_keywords}\n")
print("\033[1m🔝 Top\033[0m: The most popular search topics.")
@@ -209,11 +207,15 @@ def get_related_topics_and_save_csv(search_keywords):
# Display the DataFrame using tabulate
pd.set_option('display.max_rows', all_topics_df.shape[0]+1)
print(all_topics_df.head(10))
#print(tabulate(all_topics_df, headers='keys', tablefmt='fancy_grid'))
table = tabulate(all_topics_df, headers='keys', tablefmt='fancy_grid')
try:
save_in_file(table)
except Exception as save_results_err:
logger.error(f"Failed to save search results: {save_results_err}")
return all_topics_df
except Exception as e:
print(f"ERROR: An error occurred: {e}")
print(f"ERROR: An error occurred in related topics: {e}")
return pd.DataFrame()
@@ -224,7 +226,7 @@ def get_source(url):
response.raise_for_status() # Raise an HTTPError for bad responses
return response
except requests.exceptions.RequestException as e:
logging.error(f"Error during HTTP request: {e}")
logger.error(f"Error during HTTP request: {e}")
return None
@@ -240,10 +242,10 @@ def get_results(query):
else:
return None
except json.JSONDecodeError as e:
logging.error(f"Error decoding JSON response: {e}")
logger.error(f"Error decoding JSON response: {e}")
return None
except requests.exceptions.RequestException as e:
logging.error(f"Error during HTTP request: {e}")
logger.error(f"Error during HTTP request: {e}")
return None
@@ -256,7 +258,7 @@ def format_results(results):
suggestions.append(suggestion)
return suggestions
except (KeyError, IndexError) as e:
logging.error(f"Error parsing search results: {e}")
logger.error(f"Error parsing search results: {e}")
return []
@@ -288,7 +290,7 @@ def get_expanded_terms(query):
return terms
except Exception as e:
logging.error(f"Error in get_expanded_terms: {e}")
logger.error(f"Error in get_expanded_terms: {e}")
return []
@@ -307,7 +309,7 @@ def get_expanded_suggestions(query):
return all_results
except Exception as e:
logging.error(f"Error in get_expanded_suggestions: {e}")
logger.error(f"Error in get_expanded_suggestions: {e}")
return []
@@ -321,10 +323,14 @@ def get_suggestions_for_keyword(search_term):
#expanded_results_df.to_csv('results.csv', index=False)
pd.set_option('display.max_rows', expanded_results_df.shape[0]+1)
expanded_results_df.drop_duplicates('Keywords', inplace=True)
table = tabulate(expanded_results_df, headers=['Keywords', 'Relevance'], tablefmt='fancy_grid')
try:
save_in_file(table)
except Exception as save_results_err:
logger.error(f"Failed to save search results: {save_results_err}")
return expanded_results_df
except Exception as e:
logging.error(f"get_suggestions_for_keyword: Error in main: {e}")
logger.error(f"get_suggestions_for_keyword: Error in main: {e}")
@@ -355,7 +361,7 @@ def perform_keyword_clustering(expanded_results_df, num_clusters=5):
return expanded_results_df
except Exception as e:
logging.error(f"Error in perform_keyword_clustering: {e}")
logger.error(f"Error in perform_keyword_clustering: {e}")
return pd.DataFrame()
@@ -404,7 +410,7 @@ def visualize_silhouette(X, labels):
plt.show()
except Exception as e:
logging.error(f"Error in visualize_silhouette: {e}")
logger.error(f"Error in visualize_silhouette: {e}")
@@ -435,9 +441,9 @@ def print_and_return_top_keywords(expanded_results_df, num_clusters=5):
table = tabulate(top_keywords_df, headers='keys', tablefmt='fancy_grid')
# Save the combined table to a file
try:
save_in_file(top_keywords_df)
save_in_file(table)
except Exception as save_results_err:
logger.error(f"Failed to save search results: {save_results_err}")
logger.error(f"🚨 Failed to save search results: {save_results_err}")
print(table)
return top_keywords_df
@@ -484,11 +490,15 @@ def do_google_trends_analysis(search_term):
for asearch_term in search_term:
#FIXME: Lets work with a single root keyword.
suggestions_df = get_suggestions_for_keyword(asearch_term)
if len(suggestions_df['Keywords']) > 10:
result_df = perform_keyword_clustering(suggestions_df)
# Display top keywords in each cluster
top_keywords = print_and_return_top_keywords(result_df)
all_the_keywords.append(top_keywords['Keywords'].tolist())
else:
all_the_keywords.append(suggestions_df['Keywords'].tolist())
all_the_keywords = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in all_the_keywords])
result_df = perform_keyword_clustering(suggestions_df)
# Display top keywords in each cluster
top_keywords = print_and_return_top_keywords(result_df)
all_the_keywords.append(top_keywords['Keywords'].tolist())
#
# # FIXME: Get result from vision GPT. Fetch and visualize Google Trends data
# #trends_data = fetch_google_trends_interest_overtime("llamaindex")
@@ -496,23 +506,17 @@ def do_google_trends_analysis(search_term):
# # FIXME: Plot Interest Over time.
# result_df = plot_interest_by_region(search_term)
#
# # Display additional information
# Display additional information
result_df = get_related_topics_and_save_csv(search_term)
# Extract 'Top' topic_title
top_topic_title = result_df['topic_title'].values.tolist()
# Join each sublist into one string separated by comma
#top_topic_title = [','.join(filter(None, map(str, sublist))) for sublist in top_topic_title]
top_topic_title = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in top_topic_title])
print(f"\nRising and Top keywords: {top_topic_title}")
# Print or use the extracted topic titles
all_the_keywords = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in all_the_keywords])
print(f"\n\n📢❗🚨 Important keywords to target: {all_the_keywords}\n\n")
all_the_keywords += top_topic_title
print(all_the_keywords)
# TBD: Not getting great results OR unable to understand them.
#all_the_keywords += top_topic_title
all_the_keywords = all_the_keywords.split(',')
# Split the list into chunks of 5 keywords
chunk_size = 4
chunks = [all_the_keywords[i:i + chunk_size] for i in range(0, len(all_the_keywords), chunk_size)]
@@ -520,11 +524,15 @@ def do_google_trends_analysis(search_term):
combined_df = pd.DataFrame(chunks, columns=[f'K📢eyword Col{i + 1}' for i in range(chunk_size)])
# Print the table
print(tabulate(combined_df, headers='keys', tablefmt='fancy_grid'))
#combined_df = pd.DataFrame({'📢❗🚨 Important keywords to target': chunks})
table = tabulate(combined_df, headers='keys', tablefmt='fancy_grid')
# Save the combined table to a file
try:
save_in_file(table)
except Exception as save_results_err:
logger.error(f"Failed to save search results: {save_results_err}")
print(table)
print(all_the_keywords)
generate_wordcloud(all_the_keywords.split(','))
#generate_wordcloud(all_the_keywords)
return(all_the_keywords)
except Exception as e:
logging.error(f"Error in main: {e}")
logger.error(f"Error in Google Trends Analysis: {e}")

View File

@@ -47,33 +47,33 @@ def gpt_web_researcher(search_keywords, time_range=None, include_domains=list(),
google_result = None
tavily_result = None
report = None
# try:
# logger.info(f"Doing Google search for: {search_keywords}\n")
# google_result = google_search(search_keywords)
# blog_titles.append(extract_info(google_result, "titles"))
# except Exception as err:
# logger.error(f"Failed to do Google Serpapi research: {err}")
# # Not failing, as tavily would do same and then GPT-V to search.
#
# try:
# # FIXME: Include the follow-up questions as blog FAQs.
# logger.info(f"Doing Tavily AI search for: {search_keywords}")
# tavily_result = get_tavilyai_results(search_keywords, include_domains)
# blog_titles.append(tavily_extract_information(tavily_result, "titles"))
# except Exception as err:
# logger.error(f"Failed to do Tavily AI Search: {err}")
try:
logger.info(f"Doing Google search for: {search_keywords}\n")
google_result = google_search(search_keywords)
blog_titles.append(extract_info(google_result, "titles"))
except Exception as err:
logger.error(f"Failed to do Google Serpapi research: {err}")
# Not failing, as tavily would do same and then GPT-V to search.
# try:
# logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}")
# response_articles = metaphor_search_articles(
# search_keywords,
# include_domains=include_domains,
# time_range=time_range,
# similar_url=similar_url)
# blog_titles.append(metaphor_extract_titles_or_text(response_articles, return_titles=True))
# except Exception as err:
# logger.error(f"Failed to do Metaphor search: {err}")
# print(blog_titles)
try:
# FIXME: Include the follow-up questions as blog FAQs.
logger.info(f"Doing Tavily AI search for: {search_keywords}")
tavily_result = get_tavilyai_results(search_keywords, include_domains)
blog_titles.append(tavily_extract_information(tavily_result, "titles"))
except Exception as err:
logger.error(f"Failed to do Tavily AI Search: {err}")
try:
logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}")
response_articles = metaphor_search_articles(
search_keywords,
include_domains=include_domains,
time_range=time_range,
similar_url=similar_url)
blog_titles.append(metaphor_extract_titles_or_text(response_articles, return_titles=True))
except Exception as err:
logger.error(f"Failed to do Metaphor search: {err}")
print(blog_titles)
try:
logger.info(f"Do Google Trends analysis for given keywords: {search_keywords}")
@@ -85,18 +85,7 @@ def gpt_web_researcher(search_keywords, time_range=None, include_domains=list(),
# 1. Return a list of related keywords along with search volumes.
# 2. New blog titles to write on(niche, top) and blog sections.
# 3. Competitors list, similar urls if given.
class Result(NamedTuple):
url: str
id: str
title: str
score: float
published_date: str
author: str
text: str
highlights: List[str]
highlight_scores: List[float]
print(f"\n\nReview the analysis in this file at: {os.environ.get('SEARCH_SAVE_FILE')}\n")
def metaphor_extract_titles_or_text(json_data, return_titles=True):
@@ -110,12 +99,10 @@ def metaphor_extract_titles_or_text(json_data, return_titles=True):
Returns:
list: List of titles or text.
"""
result_list = [Result(**result) for result in json_data]
if return_titles:
return [result.title for result in result_list]
return [(result.title) for result in json_data]
else:
return [result.text for result in result_list]
return [result.text for result in json_data]
def extract_info(json_data, info_type):

View File

@@ -142,10 +142,11 @@ def metaphor_search_articles(query,
logger.error(f"Failed in metaphor.search_and_contents: {err}")
# From each webpage, get a summary of the web page.
print(search_response)
contents_response = search_response.results
for content in tqdm(contents_response, desc="Reading Web URL content:", unit="content"):
summarized_content = summarize_web_content(content.text, "gemini")
content.text = summarized_content
# for content in tqdm(contents_response, desc="Reading Web URL content:", unit="content"):
# summarized_content = summarize_web_content(content.text, "gemini")
# content.text = summarized_content
print_search_result(contents_response)

View File

@@ -129,18 +129,18 @@ def print_result_table(output_data):
logger.error(f"Failed to save search results: {save_results_err}")
# Display the 'follow_up_questions' in a table
table_headers = [f"Search Engine follow up questions for query: {output_data.get('query')}"]
table_data = [[output_data.get("follow_up_questions")]]
table = tabulate(table_data,
if output_data.get("follow_up_questions"):
table_headers = [f"Search Engine follow up questions for query: {output_data.get('query')}"]
table_data = [[output_data.get("follow_up_questions")]]
table = tabulate(table_data,
headers=table_headers,
tablefmt="fancy_grid",
maxcolwidths=[80])
print(table)
# Save the combined table to a file
try:
save_in_file(table)
except Exception as save_results_err:
logger.error(f"Failed to save search results: {save_results_err}")
print(table)
try:
save_in_file(table)
except Exception as save_results_err:
logger.error(f"Failed to save search results: {save_results_err}")
def save_in_file(table_content):