Google trends data and keyword research
This commit is contained in:
@@ -105,124 +105,55 @@ def plot_interest_by_region(kw_list):
|
||||
|
||||
|
||||
|
||||
def get_related_queries_and_save_csv(keywords, hl='en-US', tz=360, cat=0, timeframe='today 12-m'):
|
||||
"""
|
||||
Get related queries for the given search keywords and save the result to a CSV file.
|
||||
|
||||
Args:
|
||||
search_keywords (list): List of search keywords.
|
||||
hl (str): Language parameter, default is 'en-US'.
|
||||
tz (int): Timezone parameter, default is 360.
|
||||
cat (int): Category parameter, default is 0.
|
||||
timeframe (str): Timeframe parameter, default is 'today 12-m'.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: DataFrame containing related queries.
|
||||
"""
|
||||
try:
|
||||
# Build model
|
||||
pytrends = TrendReq(hl=hl, tz=tz)
|
||||
pytrends.build_payload(kw_list=keywords, cat=cat, timeframe=timeframe)
|
||||
|
||||
# Get related queries
|
||||
data = pytrends.related_queries()
|
||||
|
||||
# Extract data from the result
|
||||
top_queries = list(data.values())[0]['top']
|
||||
rising_queries = list(data.values())[0]['rising']
|
||||
top_rising_queries = top_queries + rising_queries
|
||||
|
||||
# Convert lists to DataFrames
|
||||
df_top_queries = pd.DataFrame(top_queries)
|
||||
df_rising_queries = pd.DataFrame(rising_queries) # Added this line
|
||||
|
||||
# Rename columns to avoid duplicates
|
||||
df_top_queries.columns = ['Top query', 'value']
|
||||
df_rising_queries.columns = ['Rising query', 'value']
|
||||
|
||||
# Save to CSV
|
||||
all_queries_df = pd.concat([df_top_queries, df_rising_queries], axis=1)
|
||||
#all_queries_df.to_csv('related_queries.csv', index=False)
|
||||
|
||||
# Display additional information
|
||||
console = Console()
|
||||
# Display additional information with emojis and bold formatting
|
||||
print("\n📢❗🚨 ")
|
||||
print("\n\033[1m🔝 Top\033[0m: The most popular search queries. Scoring is on a relative scale where a value of 100 is the most commonly searched query, 50 is a query searched half as often, and a value of 0 is a query searched for less than 1% as often as the most popular query.\n")
|
||||
print("\n\033[1m🚀 Rising\033[0m: Queries with the biggest increase in search frequency since the last time period. Results marked 'Breakout' had a tremendous increase, probably because these queries are new and had few (if any) prior searches.\n")
|
||||
# Display the DataFrame using tabulate
|
||||
table = tabulate(all_queries_df, headers='keys', tablefmt='fancy_grid')
|
||||
print(table)
|
||||
# Save the combined table to a file
|
||||
try:
|
||||
save_in_file(table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
return top_rising_queries
|
||||
|
||||
except Exception as e:
|
||||
print(f"get_related_queries_and_save_csv: ERROR: An error occurred: {e}")
|
||||
|
||||
|
||||
def get_related_topics_and_save_csv(search_keywords):
|
||||
"""
|
||||
Get related topics for the given search keywords and save the result to a CSV file.
|
||||
|
||||
Args:
|
||||
search_keywords (list): List of search keywords.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: DataFrame containing related topics.
|
||||
"""
|
||||
search_keywords = [f"{search_keywords}"]
|
||||
try:
|
||||
# Build model
|
||||
pytrends = TrendReq(hl='en-US', tz=360)
|
||||
pytrends.build_payload(kw_list=search_keywords, timeframe='today 12-m')
|
||||
|
||||
# Build payload
|
||||
# FIXME: Remove hardcoding.
|
||||
pytrends.build_payload(search_keywords, cat=0, timeframe='today 12-m')
|
||||
|
||||
# Get related topics
|
||||
try:
|
||||
data = pytrends.related_topics()
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get pytrends realted topics: {err}")
|
||||
return None
|
||||
|
||||
# Extract data from the result
|
||||
top_topics = list(data.values())[0]['top']
|
||||
rising_topics = list(data.values())[0]['rising']
|
||||
# Get related topics - this returns a dictionary
|
||||
topics_data = pytrends.related_topics()
|
||||
|
||||
# Convert lists to DataFrames
|
||||
df_top_topics = pd.DataFrame(top_topics)
|
||||
df_rising_topics = pd.DataFrame(rising_topics)
|
||||
|
||||
# FIXME:Exclude specified columns
|
||||
columns_to_exclude = ['hasData', 'value', 'topic_mid', 'link']
|
||||
df_top_topics = df_top_topics.drop(columns=columns_to_exclude, errors='ignore')
|
||||
df_rising_topics = df_rising_topics.drop(columns=columns_to_exclude, errors='ignore')
|
||||
|
||||
# Rename columns to avoid duplicates and provide meaningful names
|
||||
df_top_topics.columns = ['Top- ' + col if col != 'topic_title' else col for col in df_top_topics.columns]
|
||||
df_rising_topics.columns = ['Rising- ' + col if col != 'topic_title' else col for col in df_rising_topics.columns]
|
||||
all_topics_df = pd.concat([df_top_topics, df_rising_topics], axis=1)
|
||||
|
||||
print(f"\n\n 📢❗🚨 Rising and Trending Keywords for {search_keywords}\n")
|
||||
print("\033[1m🔝 Top\033[0m: The most popular search topics.")
|
||||
print("\033[1m🚀 Rising\033[0m: Topics experiencing a significant increase in search frequency since the last time period. Topics marked :pile_of_poop:'Breakout' had a tremendous surge, likely because they are new and had few prior searches.")
|
||||
# Display the DataFrame using tabulate
|
||||
pd.set_option('display.max_rows', all_topics_df.shape[0]+1)
|
||||
print(all_topics_df.head(10))
|
||||
table = tabulate(all_topics_df, headers='keys', tablefmt='fancy_grid')
|
||||
try:
|
||||
save_in_file(table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
return all_topics_df
|
||||
|
||||
# Extract data for the first keyword
|
||||
if topics_data and search_keywords[0] in topics_data:
|
||||
keyword_data = topics_data[search_keywords[0]]
|
||||
|
||||
# Create two separate dataframes for top and rising
|
||||
top_df = keyword_data.get('top', pd.DataFrame())
|
||||
rising_df = keyword_data.get('rising', pd.DataFrame())
|
||||
|
||||
return {
|
||||
'top': top_df[['topic_title', 'value']] if not top_df.empty else pd.DataFrame(),
|
||||
'rising': rising_df[['topic_title', 'value']] if not rising_df.empty else pd.DataFrame()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"ERROR: An error occurred in related topics: {e}")
|
||||
return pd.DataFrame()
|
||||
logger.error(f"Error in related topics: {e}")
|
||||
return {'top': pd.DataFrame(), 'rising': pd.DataFrame()}
|
||||
|
||||
def get_related_queries_and_save_csv(search_keywords):
|
||||
search_keywords = [f"{search_keywords}"]
|
||||
try:
|
||||
pytrends = TrendReq(hl='en-US', tz=360)
|
||||
pytrends.build_payload(kw_list=search_keywords, timeframe='today 12-m')
|
||||
|
||||
# Get related queries - this returns a dictionary
|
||||
queries_data = pytrends.related_queries()
|
||||
|
||||
# Extract data for the first keyword
|
||||
if queries_data and search_keywords[0] in queries_data:
|
||||
keyword_data = queries_data[search_keywords[0]]
|
||||
|
||||
# Create two separate dataframes for top and rising
|
||||
top_df = keyword_data.get('top', pd.DataFrame())
|
||||
rising_df = keyword_data.get('rising', pd.DataFrame())
|
||||
|
||||
return {
|
||||
'top': top_df if not top_df.empty else pd.DataFrame(),
|
||||
'rising': rising_df if not rising_df.empty else pd.DataFrame()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error in related queries: {e}")
|
||||
return {'top': pd.DataFrame(), 'rising': pd.DataFrame()}
|
||||
|
||||
|
||||
def get_source(url):
|
||||
@@ -507,22 +438,17 @@ def do_google_trends_analysis(search_term):
|
||||
else:
|
||||
all_the_keywords.append(suggestions_df['Keywords'].tolist())
|
||||
all_the_keywords = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in all_the_keywords])
|
||||
|
||||
# Generate a random sleep time between 2 and 3 seconds
|
||||
time.sleep(random.uniform(2, 3))
|
||||
|
||||
#
|
||||
# # FIXME: Get result from vision GPT. Fetch and visualize Google Trends data
|
||||
# #trends_data = fetch_google_trends_interest_overtime("llamaindex")
|
||||
#
|
||||
# # FIXME: Plot Interest Over time.
|
||||
# result_df = plot_interest_by_region(search_term)
|
||||
#
|
||||
|
||||
# Display additional information
|
||||
try:
|
||||
result_df = get_related_topics_and_save_csv(search_term)
|
||||
logger.info(f"Related topics:: result_df: {result_df}")
|
||||
# Extract 'Top' topic_title
|
||||
if result_df:
|
||||
top_topic_title = result_df['topic_title'].values.tolist()
|
||||
top_topic_title = result_df['top']['topic_title'].values.tolist()
|
||||
# Join each sublist into one string separated by comma
|
||||
#top_topic_title = [','.join(filter(None, map(str, sublist))) for sublist in top_topic_title]
|
||||
top_topic_title = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in top_topic_title])
|
||||
@@ -551,3 +477,77 @@ def do_google_trends_analysis(search_term):
|
||||
return(all_the_keywords)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Google Trends Analysis: {e}")
|
||||
|
||||
|
||||
def get_trending_searches(country='united_states'):
|
||||
"""Get trending searches for a specific country."""
|
||||
try:
|
||||
pytrends = TrendReq(hl='en-US', tz=360)
|
||||
trending_searches = pytrends.trending_searches(pn=country)
|
||||
return trending_searches
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting trending searches: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
def display_trending_searches(trending_df):
|
||||
"""Display trending searches in the UI."""
|
||||
if trending_df.empty:
|
||||
st.info("No trending searches data available.")
|
||||
return
|
||||
|
||||
st.subheader("📊 Trending Searches")
|
||||
|
||||
# Display as numbered list with emojis
|
||||
for idx, search in enumerate(trending_df[0].head(10), 1):
|
||||
st.write(f"{idx}. 🔍 {search}")
|
||||
|
||||
def get_realtime_trends(country='US'):
|
||||
"""Get realtime trending searches for a specific country."""
|
||||
try:
|
||||
pytrends = TrendReq(hl='en-US', tz=360)
|
||||
realtime_trends = pytrends.realtime_trending_searches(pn=country)
|
||||
return realtime_trends
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting realtime trends: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
def display_realtime_trends(trends_df):
|
||||
"""Display realtime trending searches in the UI."""
|
||||
if trends_df.empty:
|
||||
st.info("No realtime trends data available.")
|
||||
return
|
||||
|
||||
st.subheader("⚡ Realtime Trends")
|
||||
|
||||
# Create tabs for different categories
|
||||
if not trends_df.empty:
|
||||
# Display top 5 trends with their titles and articles
|
||||
for _, row in trends_df.head(5).iterrows():
|
||||
with st.expander(f"🔥 {row.get('title', 'Trending Topic')}"):
|
||||
st.write(f"**Traffic:** {row.get('traffic', 'N/A')}")
|
||||
if 'articles' in row:
|
||||
st.write("📰 Related Articles:")
|
||||
for article in row['articles'][:3]: # Show top 3 articles
|
||||
st.write(f"- {article['title']}")
|
||||
|
||||
def display_google_trends_data(trends_data, search_keyword):
|
||||
# ... existing code ...
|
||||
|
||||
# Create tabs for different sections
|
||||
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
|
||||
"Related Keywords",
|
||||
"Interest Over Time",
|
||||
"Regional Interest",
|
||||
"Related Queries",
|
||||
"Related Topics",
|
||||
"Trending Now"
|
||||
])
|
||||
|
||||
# ... existing tab code ...
|
||||
|
||||
with tab6:
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
display_trending_searches(trends_data.get('trending_searches', pd.DataFrame()))
|
||||
with col2:
|
||||
display_realtime_trends(trends_data.get('realtime_trends', pd.DataFrame()))
|
||||
|
||||
@@ -27,15 +27,19 @@ from pathlib import Path
|
||||
import sys
|
||||
from datetime import datetime
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
import random
|
||||
import numpy as np
|
||||
|
||||
from lib.alwrity_ui.display_google_serp_results import (
|
||||
process_research_results,
|
||||
process_search_results,
|
||||
display_research_results
|
||||
)
|
||||
from lib.alwrity_ui.google_trends_ui import display_google_trends_data, process_trends_data
|
||||
|
||||
from .tavily_ai_search import get_tavilyai_results
|
||||
from .metaphor_basic_neural_web_search import metaphor_search_articles
|
||||
from .metaphor_basic_neural_web_search import metaphor_search_articles, streamlit_display_metaphor_results
|
||||
from .google_serp_search import google_search
|
||||
from .google_trends_researcher import do_google_trends_analysis
|
||||
#from .google_gemini_web_researcher import do_gemini_web_research
|
||||
@@ -56,6 +60,10 @@ def gpt_web_researcher(search_keywords, search_mode, **kwargs):
|
||||
logger.debug(f"Additional parameters: {kwargs}")
|
||||
|
||||
try:
|
||||
# Reset session state variables for this research operation
|
||||
if 'metaphor_results_displayed' in st.session_state:
|
||||
del st.session_state.metaphor_results_displayed
|
||||
|
||||
# Initialize result container
|
||||
research_results = None
|
||||
|
||||
@@ -157,13 +165,76 @@ def gpt_web_researcher(search_keywords, search_mode, **kwargs):
|
||||
update_progress("Metaphor AI search failed, continuing with Tavily results only...", level="warning")
|
||||
else:
|
||||
update_progress("Metaphor AI search completed successfully", progress=75)
|
||||
# Add debug logging to check the structure of metaphor_results
|
||||
logger.debug(f"Metaphor results structure: {type(metaphor_results)}")
|
||||
if isinstance(metaphor_results, dict):
|
||||
logger.debug(f"Metaphor results keys: {metaphor_results.keys()}")
|
||||
if 'data' in metaphor_results:
|
||||
logger.debug(f"Metaphor data keys: {metaphor_results['data'].keys()}")
|
||||
if 'results' in metaphor_results['data']:
|
||||
logger.debug(f"Number of results: {len(metaphor_results['data']['results'])}")
|
||||
|
||||
# Display Metaphor results only if not already displayed
|
||||
if 'metaphor_results_displayed' not in st.session_state:
|
||||
st.session_state.metaphor_results_displayed = True
|
||||
# Make sure to pass the correct parameters to streamlit_display_metaphor_results
|
||||
streamlit_display_metaphor_results(metaphor_results, search_keywords)
|
||||
|
||||
# Add Google Trends Analysis
|
||||
update_progress("Initiating Google Trends analysis...", progress=80)
|
||||
try:
|
||||
# Add an informative message about Google Trends
|
||||
with st.expander("ℹ️ About Google Trends Analysis", expanded=False):
|
||||
st.markdown("""
|
||||
**What is Google Trends Analysis?**
|
||||
|
||||
Google Trends Analysis provides insights into how often a particular search-term is entered relative to the total search-volume across various regions of the world, and in various languages.
|
||||
|
||||
**What data will be shown?**
|
||||
|
||||
- **Related Keywords**: Terms that are frequently searched together with your keyword
|
||||
- **Interest Over Time**: How interest in your keyword has changed over the past 12 months
|
||||
- **Regional Interest**: Where in the world your keyword is most popular
|
||||
- **Related Queries**: What people search for before and after searching for your keyword
|
||||
- **Related Topics**: Topics that are closely related to your keyword
|
||||
|
||||
**How to use this data:**
|
||||
|
||||
- Identify trending topics in your industry
|
||||
- Understand seasonal patterns in search behavior
|
||||
- Discover related keywords for content planning
|
||||
- Target content to specific regions with high interest
|
||||
""")
|
||||
|
||||
trends_results = do_google_pytrends_analysis(search_keywords)
|
||||
if trends_results:
|
||||
update_progress("Google Trends analysis completed successfully", progress=90)
|
||||
# Store trends results in the research_results
|
||||
if metaphor_results:
|
||||
metaphor_results['trends_data'] = trends_results
|
||||
else:
|
||||
# If metaphor_results is None, create a new container for results
|
||||
metaphor_results = {'trends_data': trends_results}
|
||||
|
||||
# Display Google Trends data using the new UI module
|
||||
display_google_trends_data(trends_results, search_keywords)
|
||||
else:
|
||||
update_progress("Google Trends analysis returned no results", level="warning")
|
||||
except Exception as trends_err:
|
||||
logger.error(f"Google Trends analysis failed: {trends_err}")
|
||||
update_progress("Google Trends analysis failed", level="warning")
|
||||
st.error(f"Error in Google Trends analysis: {str(trends_err)}")
|
||||
|
||||
# Return the combined results
|
||||
update_progress("Research completed!", progress=100, level="success")
|
||||
return metaphor_results or t_results
|
||||
|
||||
except Exception as ai_err:
|
||||
error_msg = f"AI research pipeline failed: {str(ai_err)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
update_progress(error_msg, level="error")
|
||||
raise
|
||||
|
||||
|
||||
else:
|
||||
error_msg = f"Unsupported search mode: {search_mode}"
|
||||
logger.error(error_msg)
|
||||
@@ -316,13 +387,355 @@ def do_metaphor_ai_research(search_keywords):
|
||||
return None, None
|
||||
|
||||
|
||||
def do_google_pytrends_analysis(search_keywords):
|
||||
""" """
|
||||
def do_google_pytrends_analysis(keywords):
|
||||
"""
|
||||
Perform Google Trends analysis for the given keywords.
|
||||
|
||||
Args:
|
||||
keywords (str): The search keywords to analyze
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing formatted Google Trends data with the following keys:
|
||||
- related_keywords: List of related keywords
|
||||
- interest_over_time: DataFrame with date and interest columns
|
||||
- regional_interest: DataFrame with country_code, country, and interest columns
|
||||
- related_queries: DataFrame with query and value columns
|
||||
- related_topics: DataFrame with topic and value columns
|
||||
"""
|
||||
logger.info(f"Performing Google Trends analysis for keywords: {keywords}")
|
||||
|
||||
# Create a progress container for Streamlit
|
||||
progress_container = st.empty()
|
||||
progress_bar = st.progress(0)
|
||||
|
||||
def update_progress(message, progress=None, level="info"):
|
||||
"""Helper function to update progress in Streamlit UI"""
|
||||
if progress is not None:
|
||||
progress_bar.progress(progress)
|
||||
|
||||
if level == "error":
|
||||
progress_container.error(f"🚫 {message}")
|
||||
elif level == "warning":
|
||||
progress_container.warning(f"⚠️ {message}")
|
||||
else:
|
||||
progress_container.info(f"🔄 {message}")
|
||||
logger.debug(f"Progress update [{level}]: {message}")
|
||||
|
||||
try:
|
||||
logger.info(f"Do Google Trends analysis for given keywords: {search_keywords}")
|
||||
return(do_google_trends_analysis(search_keywords))
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do google trends analysis: {err}")
|
||||
# Initialize the formatted data dictionary
|
||||
formatted_data = {
|
||||
'related_keywords': [],
|
||||
'interest_over_time': pd.DataFrame(),
|
||||
'regional_interest': pd.DataFrame(),
|
||||
'related_queries': pd.DataFrame(),
|
||||
'related_topics': pd.DataFrame()
|
||||
}
|
||||
|
||||
# Get raw trends data from google_trends_researcher
|
||||
update_progress("Fetching Google Trends data...", progress=10)
|
||||
raw_trends_data = do_google_trends_analysis(keywords)
|
||||
|
||||
if not raw_trends_data:
|
||||
logger.warning("No Google Trends data returned")
|
||||
update_progress("No Google Trends data returned", level="warning", progress=20)
|
||||
return formatted_data
|
||||
|
||||
# Process related keywords from the raw data
|
||||
update_progress("Processing related keywords...", progress=30)
|
||||
if isinstance(raw_trends_data, list):
|
||||
formatted_data['related_keywords'] = raw_trends_data
|
||||
elif isinstance(raw_trends_data, dict):
|
||||
if 'keywords' in raw_trends_data:
|
||||
formatted_data['related_keywords'] = raw_trends_data['keywords']
|
||||
if 'interest_over_time' in raw_trends_data:
|
||||
formatted_data['interest_over_time'] = raw_trends_data['interest_over_time']
|
||||
if 'regional_interest' in raw_trends_data:
|
||||
formatted_data['regional_interest'] = raw_trends_data['regional_interest']
|
||||
if 'related_queries' in raw_trends_data:
|
||||
formatted_data['related_queries'] = raw_trends_data['related_queries']
|
||||
if 'related_topics' in raw_trends_data:
|
||||
formatted_data['related_topics'] = raw_trends_data['related_topics']
|
||||
|
||||
# If we have keywords but missing other data, try to fetch them using pytrends directly
|
||||
if formatted_data['related_keywords'] and (
|
||||
formatted_data['interest_over_time'].empty or
|
||||
formatted_data['regional_interest'].empty or
|
||||
formatted_data['related_queries'].empty or
|
||||
formatted_data['related_topics'].empty
|
||||
):
|
||||
try:
|
||||
update_progress("Fetching additional data from Google Trends API...", progress=40)
|
||||
from pytrends.request import TrendReq
|
||||
pytrends = TrendReq(hl='en-US', tz=360)
|
||||
|
||||
# Build payload with the main keyword
|
||||
update_progress("Building search payload...", progress=45)
|
||||
pytrends.build_payload([keywords], timeframe='today 12-m', geo='')
|
||||
|
||||
# Get interest over time if missing
|
||||
if formatted_data['interest_over_time'].empty:
|
||||
try:
|
||||
update_progress("Fetching interest over time data...", progress=50)
|
||||
interest_df = pytrends.interest_over_time()
|
||||
if not interest_df.empty:
|
||||
formatted_data['interest_over_time'] = interest_df.reset_index()
|
||||
update_progress(f"Successfully fetched interest over time data with {len(formatted_data['interest_over_time'])} data points", progress=55)
|
||||
else:
|
||||
update_progress("No interest over time data available", level="warning", progress=55)
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching interest over time: {e}")
|
||||
update_progress(f"Error fetching interest over time: {str(e)}", level="warning", progress=55)
|
||||
|
||||
# Get regional interest if missing
|
||||
if formatted_data['regional_interest'].empty:
|
||||
try:
|
||||
update_progress("Fetching regional interest data...", progress=60)
|
||||
regional_df = pytrends.interest_by_region()
|
||||
if not regional_df.empty:
|
||||
formatted_data['regional_interest'] = regional_df.reset_index()
|
||||
update_progress(f"Successfully fetched regional interest data for {len(formatted_data['regional_interest'])} regions", progress=65)
|
||||
else:
|
||||
update_progress("No regional interest data available", level="warning", progress=65)
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching regional interest: {e}")
|
||||
update_progress(f"Error fetching regional interest: {str(e)}", level="warning", progress=65)
|
||||
|
||||
# Get related queries if missing
|
||||
if formatted_data['related_queries'].empty:
|
||||
try:
|
||||
update_progress("Fetching related queries data...", progress=70)
|
||||
# Get related queries data
|
||||
related_queries = pytrends.related_queries()
|
||||
|
||||
# Create empty DataFrame as fallback
|
||||
formatted_data['related_queries'] = pd.DataFrame(columns=['query', 'value'])
|
||||
|
||||
# Simple direct approach to avoid list index errors
|
||||
if related_queries and isinstance(related_queries, dict):
|
||||
# Check if our keyword exists in the results
|
||||
if keywords in related_queries:
|
||||
keyword_data = related_queries[keywords]
|
||||
|
||||
# Process top queries if available
|
||||
if 'top' in keyword_data and keyword_data['top'] is not None:
|
||||
try:
|
||||
update_progress("Processing top related queries...", progress=75)
|
||||
# Convert to DataFrame if it's not already
|
||||
if isinstance(keyword_data['top'], pd.DataFrame):
|
||||
top_df = keyword_data['top']
|
||||
else:
|
||||
# Try to convert to DataFrame
|
||||
top_df = pd.DataFrame(keyword_data['top'])
|
||||
|
||||
# Ensure it has the right columns
|
||||
if not top_df.empty:
|
||||
# Rename columns if needed
|
||||
if 'query' in top_df.columns:
|
||||
# Already has the right column name
|
||||
pass
|
||||
elif len(top_df.columns) > 0:
|
||||
# Use first column as query
|
||||
top_df = top_df.rename(columns={top_df.columns[0]: 'query'})
|
||||
|
||||
# Add to our results
|
||||
formatted_data['related_queries'] = top_df
|
||||
update_progress(f"Successfully processed {len(top_df)} top related queries", progress=80)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing top queries: {e}")
|
||||
update_progress(f"Error processing top queries: {str(e)}", level="warning", progress=80)
|
||||
|
||||
# Process rising queries if available
|
||||
if 'rising' in keyword_data and keyword_data['rising'] is not None:
|
||||
try:
|
||||
update_progress("Processing rising related queries...", progress=85)
|
||||
# Convert to DataFrame if it's not already
|
||||
if isinstance(keyword_data['rising'], pd.DataFrame):
|
||||
rising_df = keyword_data['rising']
|
||||
else:
|
||||
# Try to convert to DataFrame
|
||||
rising_df = pd.DataFrame(keyword_data['rising'])
|
||||
|
||||
# Ensure it has the right columns
|
||||
if not rising_df.empty:
|
||||
# Rename columns if needed
|
||||
if 'query' in rising_df.columns:
|
||||
# Already has the right column name
|
||||
pass
|
||||
elif len(rising_df.columns) > 0:
|
||||
# Use first column as query
|
||||
rising_df = rising_df.rename(columns={rising_df.columns[0]: 'query'})
|
||||
|
||||
# Combine with existing data if we have any
|
||||
if not formatted_data['related_queries'].empty:
|
||||
formatted_data['related_queries'] = pd.concat([formatted_data['related_queries'], rising_df])
|
||||
update_progress(f"Successfully processed {len(rising_df)} rising related queries", progress=90)
|
||||
else:
|
||||
formatted_data['related_queries'] = rising_df
|
||||
update_progress(f"Successfully processed {len(rising_df)} rising related queries", progress=90)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing rising queries: {e}")
|
||||
update_progress(f"Error processing rising queries: {str(e)}", level="warning", progress=90)
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching related queries: {e}")
|
||||
update_progress(f"Error fetching related queries: {str(e)}", level="warning", progress=90)
|
||||
# Ensure we have an empty DataFrame with the right columns
|
||||
formatted_data['related_queries'] = pd.DataFrame(columns=['query', 'value'])
|
||||
|
||||
# Get related topics if missing
|
||||
if formatted_data['related_topics'].empty:
|
||||
try:
|
||||
update_progress("Fetching related topics data...", progress=95)
|
||||
# Get related topics data
|
||||
related_topics = pytrends.related_topics()
|
||||
|
||||
# Create empty DataFrame as fallback
|
||||
formatted_data['related_topics'] = pd.DataFrame(columns=['topic', 'value'])
|
||||
|
||||
# Simple direct approach to avoid list index errors
|
||||
if related_topics and isinstance(related_topics, dict):
|
||||
# Check if our keyword exists in the results
|
||||
if keywords in related_topics:
|
||||
keyword_data = related_topics[keywords]
|
||||
|
||||
# Process top topics if available
|
||||
if 'top' in keyword_data and keyword_data['top'] is not None:
|
||||
try:
|
||||
update_progress("Processing top related topics...", progress=97)
|
||||
# Convert to DataFrame if it's not already
|
||||
if isinstance(keyword_data['top'], pd.DataFrame):
|
||||
top_df = keyword_data['top']
|
||||
else:
|
||||
# Try to convert to DataFrame
|
||||
top_df = pd.DataFrame(keyword_data['top'])
|
||||
|
||||
# Ensure it has the right columns
|
||||
if not top_df.empty:
|
||||
# Rename columns if needed
|
||||
if 'topic_title' in top_df.columns:
|
||||
top_df = top_df.rename(columns={'topic_title': 'topic'})
|
||||
elif len(top_df.columns) > 0 and 'topic' not in top_df.columns:
|
||||
# Use first column as topic
|
||||
top_df = top_df.rename(columns={top_df.columns[0]: 'topic'})
|
||||
|
||||
# Add to our results
|
||||
formatted_data['related_topics'] = top_df
|
||||
update_progress(f"Successfully processed {len(top_df)} top related topics", progress=98)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing top topics: {e}")
|
||||
update_progress(f"Error processing top topics: {str(e)}", level="warning", progress=98)
|
||||
|
||||
# Process rising topics if available
|
||||
if 'rising' in keyword_data and keyword_data['rising'] is not None:
|
||||
try:
|
||||
update_progress("Processing rising related topics...", progress=99)
|
||||
# Convert to DataFrame if it's not already
|
||||
if isinstance(keyword_data['rising'], pd.DataFrame):
|
||||
rising_df = keyword_data['rising']
|
||||
else:
|
||||
# Try to convert to DataFrame
|
||||
rising_df = pd.DataFrame(keyword_data['rising'])
|
||||
|
||||
# Ensure it has the right columns
|
||||
if not rising_df.empty:
|
||||
# Rename columns if needed
|
||||
if 'topic_title' in rising_df.columns:
|
||||
rising_df = rising_df.rename(columns={'topic_title': 'topic'})
|
||||
elif len(rising_df.columns) > 0 and 'topic' not in rising_df.columns:
|
||||
# Use first column as topic
|
||||
rising_df = rising_df.rename(columns={rising_df.columns[0]: 'topic'})
|
||||
|
||||
# Combine with existing data if we have any
|
||||
if not formatted_data['related_topics'].empty:
|
||||
formatted_data['related_topics'] = pd.concat([formatted_data['related_topics'], rising_df])
|
||||
update_progress(f"Successfully processed {len(rising_df)} rising related topics", progress=100)
|
||||
else:
|
||||
formatted_data['related_topics'] = rising_df
|
||||
update_progress(f"Successfully processed {len(rising_df)} rising related topics", progress=100)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing rising topics: {e}")
|
||||
update_progress(f"Error processing rising topics: {str(e)}", level="warning", progress=100)
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching related topics: {e}")
|
||||
update_progress(f"Error fetching related topics: {str(e)}", level="warning", progress=100)
|
||||
# Ensure we have an empty DataFrame with the right columns
|
||||
formatted_data['related_topics'] = pd.DataFrame(columns=['topic', 'value'])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching additional trends data: {e}")
|
||||
update_progress(f"Error fetching additional trends data: {str(e)}", level="warning", progress=100)
|
||||
|
||||
# Ensure all DataFrames have the correct column names for the UI
|
||||
update_progress("Finalizing data formatting...", progress=100)
|
||||
|
||||
if not formatted_data['interest_over_time'].empty:
|
||||
if 'date' not in formatted_data['interest_over_time'].columns:
|
||||
formatted_data['interest_over_time'] = formatted_data['interest_over_time'].reset_index()
|
||||
if 'interest' not in formatted_data['interest_over_time'].columns and keywords in formatted_data['interest_over_time'].columns:
|
||||
formatted_data['interest_over_time'] = formatted_data['interest_over_time'].rename(columns={keywords: 'interest'})
|
||||
|
||||
if not formatted_data['regional_interest'].empty:
|
||||
if 'country_code' not in formatted_data['regional_interest'].columns and 'geoName' in formatted_data['regional_interest'].columns:
|
||||
formatted_data['regional_interest'] = formatted_data['regional_interest'].rename(columns={'geoName': 'country_code'})
|
||||
if 'interest' not in formatted_data['regional_interest'].columns and keywords in formatted_data['regional_interest'].columns:
|
||||
formatted_data['regional_interest'] = formatted_data['regional_interest'].rename(columns={keywords: 'interest'})
|
||||
|
||||
if not formatted_data['related_queries'].empty:
|
||||
# Handle different column names that might be present in the related queries DataFrame
|
||||
if 'query' not in formatted_data['related_queries'].columns:
|
||||
if 'Top query' in formatted_data['related_queries'].columns:
|
||||
formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={'Top query': 'query'})
|
||||
elif 'Rising query' in formatted_data['related_queries'].columns:
|
||||
formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={'Rising query': 'query'})
|
||||
elif 'query' not in formatted_data['related_queries'].columns and len(formatted_data['related_queries'].columns) > 0:
|
||||
# If we have a DataFrame but no 'query' column, use the first column as 'query'
|
||||
first_col = formatted_data['related_queries'].columns[0]
|
||||
formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={first_col: 'query'})
|
||||
|
||||
if 'value' not in formatted_data['related_queries'].columns and len(formatted_data['related_queries'].columns) > 1:
|
||||
# If we have a second column, use it as 'value'
|
||||
second_col = formatted_data['related_queries'].columns[1]
|
||||
formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={second_col: 'value'})
|
||||
elif 'value' not in formatted_data['related_queries'].columns:
|
||||
# If no 'value' column exists, add one with default values
|
||||
formatted_data['related_queries']['value'] = 0
|
||||
|
||||
if not formatted_data['related_topics'].empty:
|
||||
# Handle different column names that might be present in the related topics DataFrame
|
||||
if 'topic' not in formatted_data['related_topics'].columns:
|
||||
if 'topic_title' in formatted_data['related_topics'].columns:
|
||||
formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={'topic_title': 'topic'})
|
||||
elif 'topic' not in formatted_data['related_topics'].columns and len(formatted_data['related_topics'].columns) > 0:
|
||||
# If we have a DataFrame but no 'topic' column, use the first column as 'topic'
|
||||
first_col = formatted_data['related_topics'].columns[0]
|
||||
formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={first_col: 'topic'})
|
||||
|
||||
if 'value' not in formatted_data['related_topics'].columns and len(formatted_data['related_topics'].columns) > 1:
|
||||
# If we have a second column, use it as 'value'
|
||||
second_col = formatted_data['related_topics'].columns[1]
|
||||
formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={second_col: 'value'})
|
||||
elif 'value' not in formatted_data['related_topics'].columns:
|
||||
# If no 'value' column exists, add one with default values
|
||||
formatted_data['related_topics']['value'] = 0
|
||||
|
||||
# Clear the progress container after completion
|
||||
progress_container.empty()
|
||||
progress_bar.empty()
|
||||
|
||||
return formatted_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Google Trends analysis: {e}")
|
||||
update_progress(f"Error in Google Trends analysis: {str(e)}", level="error", progress=100)
|
||||
# Clear the progress container after error
|
||||
progress_container.empty()
|
||||
progress_bar.empty()
|
||||
return {
|
||||
'related_keywords': [],
|
||||
'interest_over_time': pd.DataFrame(),
|
||||
'regional_interest': pd.DataFrame(),
|
||||
'related_queries': pd.DataFrame(),
|
||||
'related_topics': pd.DataFrame()
|
||||
}
|
||||
|
||||
|
||||
def metaphor_extract_titles_or_text(json_data, return_titles=True):
|
||||
|
||||
@@ -262,285 +262,185 @@ def metaphor_search_articles(query, search_options: dict = None):
|
||||
except Exception as tavily_err:
|
||||
logger.warning(f"Error getting Tavily answer: {tavily_err}")
|
||||
|
||||
# Display results in Streamlit
|
||||
streamlit_display_metaphor_results(formatted_response)
|
||||
# Return the formatted response without displaying it
|
||||
# The display will be handled by gpt_web_researcher
|
||||
return formatted_response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Exa searching articles: {e}")
|
||||
return None
|
||||
|
||||
def streamlit_display_metaphor_results(metaphor_response: dict):
|
||||
"""
|
||||
Display Metaphor search results in Streamlit with enhanced metrics and popovers
|
||||
def streamlit_display_metaphor_results(metaphor_response, search_keywords=None):
|
||||
"""Display Metaphor search results in Streamlit."""
|
||||
|
||||
Args:
|
||||
metaphor_response (dict): Response from Metaphor search
|
||||
"""
|
||||
if not metaphor_response or 'data' not in metaphor_response:
|
||||
st.error("No valid Metaphor search results to display")
|
||||
if not metaphor_response:
|
||||
st.error("No search results found.")
|
||||
return
|
||||
|
||||
|
||||
# Add debug logging
|
||||
logger.debug(f"Displaying Metaphor results. Type: {type(metaphor_response)}")
|
||||
if isinstance(metaphor_response, dict):
|
||||
logger.debug(f"Metaphor response keys: {metaphor_response.keys()}")
|
||||
|
||||
# Initialize session state variables if they don't exist
|
||||
if 'search_insights' not in st.session_state:
|
||||
st.session_state.search_insights = None
|
||||
|
||||
if 'metaphor_response' not in st.session_state:
|
||||
st.session_state.metaphor_response = metaphor_response
|
||||
|
||||
st.session_state.metaphor_response = None
|
||||
if 'insights_generated' not in st.session_state:
|
||||
st.session_state.insights_generated = False
|
||||
|
||||
# Update the stored metaphor_response with the latest data
|
||||
# Store the current response in session state
|
||||
st.session_state.metaphor_response = metaphor_response
|
||||
|
||||
# Display metrics in columns
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
# Calculate metrics
|
||||
results = metaphor_response['data']['results']
|
||||
# Display search results
|
||||
st.subheader("🔍 Search Results")
|
||||
|
||||
# Calculate metrics - handle different data structures
|
||||
results = []
|
||||
if isinstance(metaphor_response, dict):
|
||||
if 'data' in metaphor_response and 'results' in metaphor_response['data']:
|
||||
results = metaphor_response['data']['results']
|
||||
elif 'results' in metaphor_response:
|
||||
results = metaphor_response['results']
|
||||
|
||||
total_results = len(results)
|
||||
avg_score = sum(r['score'] for r in results if r['score']) / total_results if total_results > 0 else 0
|
||||
avg_relevance = sum(r.get('score', 0) for r in results) / total_results if total_results > 0 else 0
|
||||
|
||||
# Display metrics
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.metric(
|
||||
label="Total Results",
|
||||
value=total_results
|
||||
)
|
||||
st.metric("Total Results", total_results)
|
||||
with col2:
|
||||
if metaphor_response['data'].get('costDollars'):
|
||||
cost = metaphor_response['data']['costDollars']
|
||||
st.metric(
|
||||
label="Search Cost",
|
||||
value=f"${cost['total']:.3f}"
|
||||
)
|
||||
with col3:
|
||||
st.metric(
|
||||
label="Average Relevance Score",
|
||||
value=f"{avg_score:.2f}"
|
||||
)
|
||||
|
||||
# Display AI-generated answers side by side
|
||||
if 'answer' in metaphor_response or 'tavily_answer' in metaphor_response:
|
||||
st.markdown("### 🤖 AI-Generated Research Answers")
|
||||
|
||||
# Create two columns for side-by-side display
|
||||
tavily_col, metaphor_col = st.columns(2)
|
||||
|
||||
# Display Tavily answer if available
|
||||
with tavily_col:
|
||||
if 'tavily_answer' in metaphor_response:
|
||||
st.markdown("#### 🔍 Tavily AI Answer")
|
||||
st.markdown(f"""
|
||||
<div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 5px solid #FF4B4B;">
|
||||
{metaphor_response['tavily_answer']}
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
if metaphor_response.get('tavily_cost_dollars'):
|
||||
st.caption(f"Tavily Answer Cost: ${metaphor_response['tavily_cost_dollars']['total']:.3f}")
|
||||
|
||||
if metaphor_response.get('tavily_citations'):
|
||||
with st.expander("📚 Tavily Sources"):
|
||||
for idx, citation in enumerate(metaphor_response['tavily_citations'], 1):
|
||||
st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})")
|
||||
else:
|
||||
st.markdown("#### 🔍 Tavily AI Answer")
|
||||
st.info("No Tavily answer available for this query.")
|
||||
|
||||
# Display Metaphor answer if available
|
||||
with metaphor_col:
|
||||
if 'answer' in metaphor_response:
|
||||
st.markdown("#### 🔍 Metaphor AI Answer")
|
||||
st.markdown(f"""
|
||||
<div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 5px solid #4CAF50;">
|
||||
{metaphor_response['answer']}
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
if metaphor_response.get('answerCostDollars'):
|
||||
st.caption(f"Metaphor Answer Cost: ${metaphor_response['answerCostDollars']['total']:.3f}")
|
||||
|
||||
if metaphor_response.get('citations'):
|
||||
with st.expander("📚 Metaphor Sources"):
|
||||
for idx, citation in enumerate(metaphor_response['citations'], 1):
|
||||
st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})")
|
||||
else:
|
||||
st.markdown("#### 🔍 Metaphor AI Answer")
|
||||
st.info("No Metaphor answer available for this query.")
|
||||
st.metric("Average Relevance Score", f"{avg_relevance:.2f}")
|
||||
|
||||
# Add "Get Search Insights" button - moved outside the AI answers conditional
|
||||
st.markdown("### 🔍 Search Insights")
|
||||
# Display AI-generated answers if available
|
||||
if 'tavily_answer' in metaphor_response or 'metaphor_answer' in metaphor_response:
|
||||
st.subheader("🤖 AI-Generated Answers")
|
||||
|
||||
if 'tavily_answer' in metaphor_response:
|
||||
st.markdown("**Tavily AI Answer:**")
|
||||
st.write(metaphor_response['tavily_answer'])
|
||||
|
||||
if 'metaphor_answer' in metaphor_response:
|
||||
st.markdown("**Metaphor AI Answer:**")
|
||||
st.write(metaphor_response['metaphor_answer'])
|
||||
|
||||
# Create a container for the insights
|
||||
insights_container = st.container()
|
||||
|
||||
# Use a button with a callback function
|
||||
if st.button("Generate Search Insights", type="primary"):
|
||||
# Set a flag in session state to indicate that insights should be generated
|
||||
# Get Search Insights button
|
||||
if st.button("Generate Search Insights", key="metaphor_generate_insights_button"):
|
||||
st.session_state.insights_generated = True
|
||||
|
||||
# Store the current metaphor_response in session state
|
||||
st.session_state.metaphor_response = metaphor_response
|
||||
|
||||
# Redirect to the same page with a query parameter to trigger insights generation
|
||||
st.experimental_rerun()
|
||||
|
||||
# If insights should be generated, do it in a separate container
|
||||
if st.session_state.insights_generated:
|
||||
with insights_container:
|
||||
with st.spinner("Analyzing search results to generate insights..."):
|
||||
# Get the stored metaphor_response from session state
|
||||
stored_response = st.session_state.metaphor_response
|
||||
stored_results = stored_response['data']['results']
|
||||
|
||||
# Prepare data for analysis
|
||||
analysis_data = {
|
||||
"metaphor_results": stored_results,
|
||||
"metaphor_answer": stored_response.get("answer", ""),
|
||||
"tavily_answer": stored_response.get("tavily_answer", ""),
|
||||
"metaphor_citations": stored_response.get("citations", []),
|
||||
"tavily_citations": stored_response.get("tavily_citations", [])
|
||||
}
|
||||
|
||||
# Create the analysis prompt
|
||||
analysis_prompt = f"""
|
||||
**Search Intent & User Needs Analysis**
|
||||
|
||||
I have conducted research using both Tavily and Metaphor AI search engines.
|
||||
Below is the data from both sources:
|
||||
|
||||
**Metaphor AI Answer:**
|
||||
{analysis_data["metaphor_answer"]}
|
||||
|
||||
**Tavily AI Answer:**
|
||||
{analysis_data["tavily_answer"]}
|
||||
|
||||
**Search Results:**
|
||||
{[f"{i+1}. {r['title']} - {r['summary']}" for i, r in enumerate(analysis_data["metaphor_results"])]}
|
||||
|
||||
**Citations:**
|
||||
{[f"{i+1}. {c.get('title', 'Untitled')} - {c.get('url', 'No URL')}" for i, c in enumerate(analysis_data["metaphor_citations"] + analysis_data["tavily_citations"])]}
|
||||
|
||||
Based on this research data, please provide the following insights:
|
||||
|
||||
**Search Intent & User Needs**
|
||||
```
|
||||
Review the research data and identify:
|
||||
1. The distribution of search intent (categorize as Informational/Commercial/Navigational/Transactional)
|
||||
2. Most common user questions and their patterns
|
||||
3. Frequently mentioned pain points or challenges
|
||||
4. Recurring solutions or approaches to addressing these challenges
|
||||
5. Gaps between user questions and available answers
|
||||
|
||||
Present findings in a structured format with percentages and specific examples.
|
||||
```
|
||||
|
||||
Format your response as a comprehensive analysis with clear sections, bullet points, and examples from the research data.
|
||||
"""
|
||||
|
||||
try:
|
||||
# Import the llm_text_gen function
|
||||
import importlib
|
||||
text_gen_module = importlib.import_module('lib.gpt_providers.text_generation.main_text_generation')
|
||||
if hasattr(text_gen_module, 'llm_text_gen'):
|
||||
# Generate insights using llm_text_gen
|
||||
insights = text_gen_module.llm_text_gen(analysis_prompt)
|
||||
|
||||
# Store insights in session state
|
||||
st.session_state.search_insights = insights
|
||||
|
||||
# Reset the flag to prevent regeneration on next rerun
|
||||
st.session_state.insights_generated = False
|
||||
else:
|
||||
st.error("Could not find llm_text_gen function in the text generation module.")
|
||||
except Exception as e:
|
||||
st.error(f"Error generating insights: {str(e)}")
|
||||
logger.error(f"Error generating insights: {e}")
|
||||
st.rerun()
|
||||
|
||||
# Display insights if they exist in session state
|
||||
if st.session_state.search_insights:
|
||||
with insights_container:
|
||||
st.markdown("### 🔍 Search Intent & User Needs Analysis")
|
||||
st.markdown(st.session_state.search_insights)
|
||||
|
||||
# Create DataFrame from results
|
||||
df = pd.DataFrame(results)
|
||||
st.subheader("🔍 Search Insights")
|
||||
st.write(st.session_state.search_insights)
|
||||
|
||||
# Display search results in a data editor
|
||||
st.subheader("📊 Detailed Results")
|
||||
|
||||
# Prepare data for display
|
||||
display_df = df.copy()
|
||||
display_df['Visit Site'] = display_df['url']
|
||||
results_data = []
|
||||
for result in results:
|
||||
result_data = {
|
||||
'Title': result.get('title', ''),
|
||||
'URL': result.get('url', ''),
|
||||
'Snippet': result.get('summary', ''),
|
||||
'Relevance Score': result.get('score', 0),
|
||||
'Published Date': result.get('publishedDate', '')
|
||||
}
|
||||
results_data.append(result_data)
|
||||
|
||||
# Format publishedDate as string if it exists
|
||||
if 'publishedDate' in display_df.columns:
|
||||
display_df['publishedDate'] = display_df['publishedDate'].apply(
|
||||
lambda x: x[:10] if isinstance(x, str) else 'N/A'
|
||||
# Create DataFrame
|
||||
df = pd.DataFrame(results_data)
|
||||
|
||||
# Display the DataFrame if it's not empty
|
||||
if not df.empty:
|
||||
# Configure columns
|
||||
st.dataframe(
|
||||
df,
|
||||
column_config={
|
||||
"Title": st.column_config.TextColumn(
|
||||
"Title",
|
||||
help="Title of the search result",
|
||||
width="large",
|
||||
),
|
||||
"URL": st.column_config.LinkColumn(
|
||||
"URL",
|
||||
help="Link to the search result",
|
||||
width="medium",
|
||||
display_text="Visit Article",
|
||||
),
|
||||
"Snippet": st.column_config.TextColumn(
|
||||
"Snippet",
|
||||
help="Summary of the search result",
|
||||
width="large",
|
||||
),
|
||||
"Relevance Score": st.column_config.NumberColumn(
|
||||
"Relevance Score",
|
||||
help="Relevance score of the search result",
|
||||
format="%.2f",
|
||||
width="small",
|
||||
),
|
||||
"Published Date": st.column_config.DateColumn(
|
||||
"Published Date",
|
||||
help="Publication date of the search result",
|
||||
width="medium",
|
||||
),
|
||||
},
|
||||
hide_index=True,
|
||||
)
|
||||
|
||||
# Configure columns for data editor
|
||||
columns = {
|
||||
'title': st.column_config.TextColumn(
|
||||
'Title',
|
||||
width='large',
|
||||
required=True,
|
||||
),
|
||||
'author': st.column_config.TextColumn(
|
||||
'Author',
|
||||
width='medium',
|
||||
),
|
||||
'publishedDate': st.column_config.TextColumn(
|
||||
'Published Date',
|
||||
width='medium',
|
||||
),
|
||||
'score': st.column_config.NumberColumn(
|
||||
'Relevance Score',
|
||||
width='small',
|
||||
format="%.2f"
|
||||
),
|
||||
'Visit Site': st.column_config.LinkColumn(
|
||||
'Link',
|
||||
width='small',
|
||||
display_text='Visit Site',
|
||||
),
|
||||
'summary': st.column_config.TextColumn(
|
||||
'Summary',
|
||||
width='large',
|
||||
required=True,
|
||||
)
|
||||
}
|
||||
|
||||
# Display results in data editor
|
||||
st.data_editor(
|
||||
display_df,
|
||||
column_config=columns,
|
||||
hide_index=True,
|
||||
num_rows='dynamic',
|
||||
disabled=True,
|
||||
column_order=['title', 'author', 'publishedDate', 'score', 'summary', 'Visit Site']
|
||||
)
|
||||
|
||||
# Display detailed summaries with popovers
|
||||
st.write("### Detailed Summaries")
|
||||
for idx, result in enumerate(results, 1):
|
||||
with st.expander(f"📄 {result['title']}", expanded=False):
|
||||
col1, col2 = st.columns([3, 1])
|
||||
with col1:
|
||||
st.markdown(f"**Summary**")
|
||||
st.markdown(result['summary'])
|
||||
with col2:
|
||||
st.markdown("**Details**")
|
||||
st.markdown(f"**Author:** {result['author'] if result['author'] else 'N/A'}")
|
||||
st.markdown(f"**Published:** {result['publishedDate'][:10] if result['publishedDate'] else 'N/A'}")
|
||||
st.markdown(f"**Score:** {result['score']:.2f}")
|
||||
st.markdown(f"[Visit Site]({result['url']})")
|
||||
|
||||
# Display search metadata
|
||||
st.divider()
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.caption(f"Search Type: {metaphor_response['data']['resolvedSearchType']}")
|
||||
with col2:
|
||||
st.caption(f"Request ID: {metaphor_response['data']['requestId']}")
|
||||
|
||||
# Add popover for snippets
|
||||
st.markdown("""
|
||||
<style>
|
||||
.snippet-popover {
|
||||
position: relative;
|
||||
display: inline-block;
|
||||
}
|
||||
.snippet-popover .snippet-content {
|
||||
visibility: hidden;
|
||||
width: 300px;
|
||||
background-color: #f9f9f9;
|
||||
color: #333;
|
||||
text-align: left;
|
||||
border-radius: 6px;
|
||||
padding: 10px;
|
||||
position: absolute;
|
||||
z-index: 1;
|
||||
bottom: 125%;
|
||||
left: 50%;
|
||||
margin-left: -150px;
|
||||
opacity: 0;
|
||||
transition: opacity 0.3s;
|
||||
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
|
||||
}
|
||||
.snippet-popover:hover .snippet-content {
|
||||
visibility: visible;
|
||||
opacity: 1;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Display snippets with popover
|
||||
st.subheader("📝 Snippets")
|
||||
for i, result in enumerate(results):
|
||||
snippet = result.get('summary', '')
|
||||
if snippet:
|
||||
st.markdown(f"""
|
||||
<div class="snippet-popover">
|
||||
<strong>{result.get('title', '')}</strong>
|
||||
<div class="snippet-content">
|
||||
{snippet}
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
else:
|
||||
st.info("No detailed results available.")
|
||||
|
||||
# Add a collapsible section for the raw JSON data
|
||||
with st.expander("Research Results (JSON)", expanded=False):
|
||||
st.json(metaphor_response)
|
||||
|
||||
|
||||
def metaphor_news_summarizer(news_keywords):
|
||||
|
||||
425
lib/alwrity_ui/google_trends_ui.py
Normal file
425
lib/alwrity_ui/google_trends_ui.py
Normal file
@@ -0,0 +1,425 @@
|
||||
"""
|
||||
Module for displaying Google Trends data in the Streamlit UI.
|
||||
|
||||
This module provides functions for visualizing Google Trends data, including:
|
||||
- Interest over time
|
||||
- Regional interest
|
||||
- Related queries
|
||||
- Related topics
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
import logging
|
||||
|
||||
# Set up logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def display_google_trends_data(trends_data, search_keyword):
|
||||
"""
|
||||
Display Google Trends data in a structured format with tabs for different sections.
|
||||
|
||||
Args:
|
||||
trends_data (dict): Dictionary containing Google Trends data
|
||||
search_keyword (str): The search keyword used for the analysis
|
||||
"""
|
||||
if not trends_data:
|
||||
st.warning("No Google Trends data available for this search.")
|
||||
return
|
||||
|
||||
st.subheader(f"Google Trends Analysis for '{search_keyword}'")
|
||||
|
||||
# Add an informative message about Google Trends
|
||||
with st.expander("ℹ️ About Google Trends Data", expanded=False):
|
||||
st.markdown("""
|
||||
**What is Google Trends?**
|
||||
|
||||
Google Trends is a public web facility that shows how often a particular search-term is entered relative to the total search-volume across various regions of the world, and in various languages.
|
||||
|
||||
**What data is shown here?**
|
||||
|
||||
- **Related Keywords**: Terms that are frequently searched together with your keyword
|
||||
- **Interest Over Time**: How interest in your keyword has changed over the past 12 months
|
||||
- **Regional Interest**: Where in the world your keyword is most popular
|
||||
- **Related Queries**: What people search for before and after searching for your keyword
|
||||
- **Related Topics**: Topics that are closely related to your keyword
|
||||
|
||||
**How to interpret the data:**
|
||||
|
||||
- Interest values range from 0 to 100, where 100 is the peak popularity for the term
|
||||
- A value of 50 means the term is half as popular as the peak
|
||||
- A value of 0 means there was not enough data for this term
|
||||
""")
|
||||
|
||||
# Create tabs for different sections
|
||||
tab1, tab2, tab3, tab4, tab5 = st.tabs([
|
||||
"Related Keywords",
|
||||
"Interest Over Time",
|
||||
"Regional Interest",
|
||||
"Related Queries",
|
||||
"Related Topics"
|
||||
])
|
||||
|
||||
with tab1:
|
||||
display_keywords_section(trends_data.get('related_keywords', []))
|
||||
|
||||
with tab2:
|
||||
display_interest_over_time(trends_data.get('interest_over_time', pd.DataFrame()))
|
||||
|
||||
with tab3:
|
||||
display_regional_interest(trends_data.get('regional_interest', pd.DataFrame()))
|
||||
|
||||
with tab4:
|
||||
display_related_queries(trends_data.get('related_queries', pd.DataFrame()))
|
||||
|
||||
with tab5:
|
||||
display_related_topics(trends_data.get('related_topics', pd.DataFrame()))
|
||||
|
||||
# Add a footer with data source information
|
||||
st.markdown("---")
|
||||
st.caption("Data source: Google Trends | Last updated: " + pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"))
|
||||
|
||||
def display_keywords_section(keywords):
|
||||
"""Display related keywords from Google Trends in a table format."""
|
||||
if not keywords:
|
||||
st.info("No related keywords data available.")
|
||||
return
|
||||
|
||||
st.subheader("Related Keywords")
|
||||
st.write("Keywords related to your search:")
|
||||
|
||||
# Add explanation about related keywords
|
||||
with st.expander("ℹ️ About Related Keywords", expanded=False):
|
||||
st.markdown("""
|
||||
**What are Related Keywords?**
|
||||
|
||||
Related keywords are terms that are frequently searched together with your main keyword.
|
||||
These keywords can help you understand what topics are associated with your search term
|
||||
and can be valuable for content planning and SEO strategies.
|
||||
|
||||
**How to use this data:**
|
||||
|
||||
- Use these keywords to expand your content strategy
|
||||
- Identify gaps in your content that you could fill
|
||||
- Understand what your audience is interested in
|
||||
- Improve your SEO by incorporating these terms naturally in your content
|
||||
""")
|
||||
|
||||
# Create a DataFrame for better display
|
||||
df = pd.DataFrame(keywords, columns=['Keyword'])
|
||||
st.dataframe(df, use_container_width=True)
|
||||
|
||||
# Add a note about the number of keywords
|
||||
st.caption(f"Found {len(keywords)} related keywords")
|
||||
|
||||
def display_interest_over_time(interest_df):
|
||||
"""Display a chart showing interest over time for a given search keyword."""
|
||||
if interest_df.empty:
|
||||
st.info("No interest over time data available.")
|
||||
return
|
||||
|
||||
st.subheader("Interest Over Time")
|
||||
|
||||
# Add explanation about interest over time
|
||||
with st.expander("ℹ️ About Interest Over Time", expanded=False):
|
||||
st.markdown("""
|
||||
**What is Interest Over Time?**
|
||||
|
||||
Interest Over Time shows how interest in your search term has changed over the past 12 months.
|
||||
The data is normalized and presented on a scale from 0 to 100, where 100 is the peak popularity
|
||||
for the term, 50 means the term is half as popular, and 0 means there was not enough data.
|
||||
|
||||
**How to interpret this chart:**
|
||||
|
||||
- Look for peaks and valleys to identify trends
|
||||
- Compare with seasonal patterns or events
|
||||
- Identify if interest is growing, declining, or stable
|
||||
- Use this data to time your content releases for maximum impact
|
||||
""")
|
||||
|
||||
try:
|
||||
# Ensure we have the required columns
|
||||
if 'date' not in interest_df.columns:
|
||||
st.error("Interest over time data is missing the 'date' column.")
|
||||
return
|
||||
|
||||
if 'interest' not in interest_df.columns:
|
||||
st.error("Interest over time data is missing the 'interest' column.")
|
||||
return
|
||||
|
||||
# Create the chart
|
||||
fig = px.line(
|
||||
interest_df,
|
||||
x='date',
|
||||
y='interest',
|
||||
title='Interest Over Time',
|
||||
labels={'date': 'Date', 'interest': 'Interest'},
|
||||
line_shape='spline'
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
xaxis_title="Date",
|
||||
yaxis_title="Interest",
|
||||
hovermode='x unified'
|
||||
)
|
||||
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
# Add summary statistics
|
||||
if not interest_df.empty:
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
st.metric("Average Interest", f"{interest_df['interest'].mean():.1f}")
|
||||
with col2:
|
||||
st.metric("Peak Interest", f"{interest_df['interest'].max():.1f}")
|
||||
with col3:
|
||||
st.metric("Lowest Interest", f"{interest_df['interest'].min():.1f}")
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error displaying interest over time chart: {str(e)}")
|
||||
logger.error(f"Error in display_interest_over_time: {e}")
|
||||
|
||||
def display_regional_interest(regional_df):
|
||||
"""Display a chart showing interest by region for the search keyword."""
|
||||
if regional_df.empty:
|
||||
st.info("No regional interest data available.")
|
||||
return
|
||||
|
||||
st.subheader("Regional Interest")
|
||||
|
||||
# Add explanation about regional interest
|
||||
with st.expander("ℹ️ About Regional Interest", expanded=False):
|
||||
st.markdown("""
|
||||
**What is Regional Interest?**
|
||||
|
||||
Regional Interest shows how interest in your search term varies across different countries.
|
||||
The data is normalized and presented on a scale from 0 to 100, where 100 is the peak popularity
|
||||
for the term in that region, 50 means the term is half as popular, and 0 means there was not enough data.
|
||||
|
||||
**How to interpret this map:**
|
||||
|
||||
- Darker colors indicate higher interest in that region
|
||||
- Lighter colors indicate lower interest
|
||||
- Hover over a country to see the exact interest value
|
||||
- Use this data to target your content to specific regions
|
||||
""")
|
||||
|
||||
try:
|
||||
# Ensure we have the required columns
|
||||
if 'country_code' not in regional_df.columns:
|
||||
st.error("Regional interest data is missing the 'country_code' column.")
|
||||
return
|
||||
|
||||
if 'interest' not in regional_df.columns:
|
||||
st.error("Regional interest data is missing the 'interest' column.")
|
||||
return
|
||||
|
||||
# Create the choropleth map
|
||||
fig = go.Figure(data=go.Choropleth(
|
||||
locations=regional_df['country_code'],
|
||||
z=regional_df['interest'],
|
||||
text=regional_df['country_code'], # This will show in the hover text
|
||||
colorscale='Viridis',
|
||||
colorbar_title="Interest Level",
|
||||
zmin=0,
|
||||
zmax=100,
|
||||
marker_line_color='darkgray',
|
||||
marker_line_width=0.5,
|
||||
showscale=True,
|
||||
colorbar=dict(
|
||||
title="Interest Level",
|
||||
tickformat=".0f",
|
||||
tickmode="linear",
|
||||
tick0=0,
|
||||
dtick=20
|
||||
)
|
||||
))
|
||||
|
||||
# Update the layout for better visualization
|
||||
fig.update_layout(
|
||||
title=dict(
|
||||
text='Regional Interest Distribution',
|
||||
x=0.5,
|
||||
xanchor='center'
|
||||
),
|
||||
geo=dict(
|
||||
showframe=False,
|
||||
showcoastlines=True,
|
||||
projection_type='equirectangular',
|
||||
showland=True,
|
||||
landcolor='lightgray',
|
||||
showocean=True,
|
||||
oceancolor='aliceblue',
|
||||
showcountries=True,
|
||||
countrycolor='darkgray'
|
||||
),
|
||||
width=800,
|
||||
height=500,
|
||||
margin=dict(l=0, r=0, t=30, b=0)
|
||||
)
|
||||
|
||||
# Display the map
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
# Display top 5 countries with highest interest
|
||||
if not regional_df.empty:
|
||||
st.subheader("Top Regions by Interest")
|
||||
top_regions = regional_df.sort_values('interest', ascending=False).head(5)
|
||||
|
||||
# Create a more visually appealing bar chart for top regions
|
||||
fig_bar = go.Figure(data=[
|
||||
go.Bar(
|
||||
x=top_regions['country_code'],
|
||||
y=top_regions['interest'],
|
||||
text=top_regions['interest'].round(1),
|
||||
textposition='auto',
|
||||
marker_color='rgb(55, 83, 109)'
|
||||
)
|
||||
])
|
||||
|
||||
fig_bar.update_layout(
|
||||
title='Top 5 Regions by Interest Level',
|
||||
xaxis_title='Region',
|
||||
yaxis_title='Interest Level',
|
||||
yaxis_range=[0, 100],
|
||||
showlegend=False
|
||||
)
|
||||
|
||||
st.plotly_chart(fig_bar, use_container_width=True)
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error displaying regional interest chart: {str(e)}")
|
||||
logger.error(f"Error in display_regional_interest: {e}")
|
||||
|
||||
def display_related_queries(queries_df):
|
||||
"""Display related queries in a structured format."""
|
||||
if queries_df.empty:
|
||||
st.info("No related queries data available.")
|
||||
return
|
||||
|
||||
st.subheader("Related Queries")
|
||||
|
||||
# Add explanation about related queries
|
||||
with st.expander("ℹ️ About Related Queries", expanded=False):
|
||||
st.markdown("""
|
||||
**What are Related Queries?**
|
||||
|
||||
Related Queries show what people search for before and after searching for your keyword.
|
||||
These queries can help you understand the search intent and context around your keyword.
|
||||
|
||||
**How to interpret this data:**
|
||||
|
||||
- The 'value' column shows the relative interest compared to your main keyword
|
||||
- Higher values indicate stronger association with your keyword
|
||||
- Use these queries to expand your content strategy
|
||||
- Identify what questions your audience is trying to answer
|
||||
""")
|
||||
|
||||
try:
|
||||
# Ensure we have the required columns
|
||||
if 'query' not in queries_df.columns:
|
||||
st.error("Related queries data is missing the 'query' column.")
|
||||
return
|
||||
|
||||
if 'value' not in queries_df.columns:
|
||||
st.error("Related queries data is missing the 'value' column.")
|
||||
return
|
||||
|
||||
# Sort by value in descending order
|
||||
queries_df = queries_df.sort_values('value', ascending=False)
|
||||
|
||||
# Display as a table
|
||||
st.dataframe(queries_df, use_container_width=True)
|
||||
|
||||
# Add a note about the number of queries
|
||||
st.caption(f"Found {len(queries_df)} related queries")
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error displaying related queries: {str(e)}")
|
||||
logger.error(f"Error in display_related_queries: {e}")
|
||||
|
||||
def display_related_topics(topics_df):
|
||||
"""Display related topics in a structured format."""
|
||||
if topics_df.empty:
|
||||
st.info("No related topics data available.")
|
||||
return
|
||||
|
||||
st.subheader("Related Topics")
|
||||
|
||||
# Add explanation about related topics
|
||||
with st.expander("ℹ️ About Related Topics", expanded=False):
|
||||
st.markdown("""
|
||||
**What are Related Topics?**
|
||||
|
||||
Related Topics show broader topics that are associated with your search term.
|
||||
These topics can help you understand the broader context and themes related to your keyword.
|
||||
|
||||
**How to interpret this data:**
|
||||
|
||||
- The 'value' column shows the relative interest compared to your main keyword
|
||||
- Higher values indicate stronger association with your keyword
|
||||
- Use these topics to understand the broader context of your keyword
|
||||
- Identify themes that might be relevant to your content strategy
|
||||
""")
|
||||
|
||||
try:
|
||||
# Ensure we have the required columns
|
||||
if 'topic' not in topics_df.columns:
|
||||
st.error("Related topics data is missing the 'topic' column.")
|
||||
return
|
||||
|
||||
if 'value' not in topics_df.columns:
|
||||
st.error("Related topics data is missing the 'value' column.")
|
||||
return
|
||||
|
||||
# Sort by value in descending order
|
||||
topics_df = topics_df.sort_values('value', ascending=False)
|
||||
|
||||
# Display as a table
|
||||
st.dataframe(topics_df, use_container_width=True)
|
||||
|
||||
# Add a note about the number of topics
|
||||
st.caption(f"Found {len(topics_df)} related topics")
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error displaying related topics: {str(e)}")
|
||||
logger.error(f"Error in display_related_topics: {e}")
|
||||
|
||||
def process_trends_data(trends_data):
|
||||
"""
|
||||
Process and format Google Trends data for display.
|
||||
|
||||
Args:
|
||||
trends_data (dict): Raw Google Trends data
|
||||
|
||||
Returns:
|
||||
dict: Formatted data ready for display
|
||||
"""
|
||||
if not trends_data:
|
||||
return {}
|
||||
|
||||
processed_data = {}
|
||||
|
||||
# Process related keywords
|
||||
if 'related_keywords' in trends_data:
|
||||
processed_data['related_keywords'] = trends_data['related_keywords']
|
||||
|
||||
# Process interest over time
|
||||
if 'interest_over_time' in trends_data and not trends_data['interest_over_time'].empty:
|
||||
processed_data['interest_over_time'] = trends_data['interest_over_time']
|
||||
|
||||
# Process regional interest
|
||||
if 'regional_interest' in trends_data and not trends_data['regional_interest'].empty:
|
||||
processed_data['regional_interest'] = trends_data['regional_interest']
|
||||
|
||||
# Process related queries
|
||||
if 'related_queries' in trends_data and not trends_data['related_queries'].empty:
|
||||
processed_data['related_queries'] = trends_data['related_queries']
|
||||
|
||||
# Process related topics
|
||||
if 'related_topics' in trends_data and not trends_data['related_topics'].empty:
|
||||
processed_data['related_topics'] = trends_data['related_topics']
|
||||
|
||||
return processed_data
|
||||
@@ -102,7 +102,12 @@ def validate_api_keys():
|
||||
return api_keys
|
||||
|
||||
def do_web_research():
|
||||
"""Input keywords and do web research with advanced options."""
|
||||
"""Main function to perform web research based on user input."""
|
||||
|
||||
# Reset session state variables for this research operation
|
||||
if 'metaphor_results_displayed' in st.session_state:
|
||||
del st.session_state.metaphor_results_displayed
|
||||
|
||||
logger.info("Starting do_web_research function")
|
||||
|
||||
try:
|
||||
@@ -509,7 +514,7 @@ def do_web_research():
|
||||
status_display.success("✨ Research completed!")
|
||||
|
||||
# Display results in an organized way
|
||||
with st.expander("📊 Research Results", expanded=True):
|
||||
with st.expander("📊 Research Results", expanded=False):
|
||||
st.write(web_research_result)
|
||||
else:
|
||||
st.warning("No results found for your search")
|
||||
|
||||
Reference in New Issue
Block a user