Google trends data and keyword research

This commit is contained in:
ajaysi
2025-04-05 22:50:43 +05:30
parent a2fb77f700
commit 579bf7d0a6
5 changed files with 1122 additions and 379 deletions

View File

@@ -105,124 +105,55 @@ def plot_interest_by_region(kw_list):
def get_related_queries_and_save_csv(keywords, hl='en-US', tz=360, cat=0, timeframe='today 12-m'):
"""
Get related queries for the given search keywords and save the result to a CSV file.
Args:
search_keywords (list): List of search keywords.
hl (str): Language parameter, default is 'en-US'.
tz (int): Timezone parameter, default is 360.
cat (int): Category parameter, default is 0.
timeframe (str): Timeframe parameter, default is 'today 12-m'.
Returns:
pd.DataFrame: DataFrame containing related queries.
"""
try:
# Build model
pytrends = TrendReq(hl=hl, tz=tz)
pytrends.build_payload(kw_list=keywords, cat=cat, timeframe=timeframe)
# Get related queries
data = pytrends.related_queries()
# Extract data from the result
top_queries = list(data.values())[0]['top']
rising_queries = list(data.values())[0]['rising']
top_rising_queries = top_queries + rising_queries
# Convert lists to DataFrames
df_top_queries = pd.DataFrame(top_queries)
df_rising_queries = pd.DataFrame(rising_queries) # Added this line
# Rename columns to avoid duplicates
df_top_queries.columns = ['Top query', 'value']
df_rising_queries.columns = ['Rising query', 'value']
# Save to CSV
all_queries_df = pd.concat([df_top_queries, df_rising_queries], axis=1)
#all_queries_df.to_csv('related_queries.csv', index=False)
# Display additional information
console = Console()
# Display additional information with emojis and bold formatting
print("\n📢❗🚨 ")
print("\n\033[1m🔝 Top\033[0m: The most popular search queries. Scoring is on a relative scale where a value of 100 is the most commonly searched query, 50 is a query searched half as often, and a value of 0 is a query searched for less than 1% as often as the most popular query.\n")
print("\n\033[1m🚀 Rising\033[0m: Queries with the biggest increase in search frequency since the last time period. Results marked 'Breakout' had a tremendous increase, probably because these queries are new and had few (if any) prior searches.\n")
# Display the DataFrame using tabulate
table = tabulate(all_queries_df, headers='keys', tablefmt='fancy_grid')
print(table)
# Save the combined table to a file
try:
save_in_file(table)
except Exception as save_results_err:
logger.error(f"Failed to save search results: {save_results_err}")
return top_rising_queries
except Exception as e:
print(f"get_related_queries_and_save_csv: ERROR: An error occurred: {e}")
def get_related_topics_and_save_csv(search_keywords):
"""
Get related topics for the given search keywords and save the result to a CSV file.
Args:
search_keywords (list): List of search keywords.
Returns:
pd.DataFrame: DataFrame containing related topics.
"""
search_keywords = [f"{search_keywords}"]
try:
# Build model
pytrends = TrendReq(hl='en-US', tz=360)
pytrends.build_payload(kw_list=search_keywords, timeframe='today 12-m')
# Build payload
# FIXME: Remove hardcoding.
pytrends.build_payload(search_keywords, cat=0, timeframe='today 12-m')
# Get related topics
try:
data = pytrends.related_topics()
except Exception as err:
logger.error(f"Failed to get pytrends realted topics: {err}")
return None
# Extract data from the result
top_topics = list(data.values())[0]['top']
rising_topics = list(data.values())[0]['rising']
# Get related topics - this returns a dictionary
topics_data = pytrends.related_topics()
# Convert lists to DataFrames
df_top_topics = pd.DataFrame(top_topics)
df_rising_topics = pd.DataFrame(rising_topics)
# FIXME:Exclude specified columns
columns_to_exclude = ['hasData', 'value', 'topic_mid', 'link']
df_top_topics = df_top_topics.drop(columns=columns_to_exclude, errors='ignore')
df_rising_topics = df_rising_topics.drop(columns=columns_to_exclude, errors='ignore')
# Rename columns to avoid duplicates and provide meaningful names
df_top_topics.columns = ['Top- ' + col if col != 'topic_title' else col for col in df_top_topics.columns]
df_rising_topics.columns = ['Rising- ' + col if col != 'topic_title' else col for col in df_rising_topics.columns]
all_topics_df = pd.concat([df_top_topics, df_rising_topics], axis=1)
print(f"\n\n 📢❗🚨 Rising and Trending Keywords for {search_keywords}\n")
print("\033[1m🔝 Top\033[0m: The most popular search topics.")
print("\033[1m🚀 Rising\033[0m: Topics experiencing a significant increase in search frequency since the last time period. Topics marked :pile_of_poop:'Breakout' had a tremendous surge, likely because they are new and had few prior searches.")
# Display the DataFrame using tabulate
pd.set_option('display.max_rows', all_topics_df.shape[0]+1)
print(all_topics_df.head(10))
table = tabulate(all_topics_df, headers='keys', tablefmt='fancy_grid')
try:
save_in_file(table)
except Exception as save_results_err:
logger.error(f"Failed to save search results: {save_results_err}")
return all_topics_df
# Extract data for the first keyword
if topics_data and search_keywords[0] in topics_data:
keyword_data = topics_data[search_keywords[0]]
# Create two separate dataframes for top and rising
top_df = keyword_data.get('top', pd.DataFrame())
rising_df = keyword_data.get('rising', pd.DataFrame())
return {
'top': top_df[['topic_title', 'value']] if not top_df.empty else pd.DataFrame(),
'rising': rising_df[['topic_title', 'value']] if not rising_df.empty else pd.DataFrame()
}
except Exception as e:
logger.error(f"ERROR: An error occurred in related topics: {e}")
return pd.DataFrame()
logger.error(f"Error in related topics: {e}")
return {'top': pd.DataFrame(), 'rising': pd.DataFrame()}
def get_related_queries_and_save_csv(search_keywords):
search_keywords = [f"{search_keywords}"]
try:
pytrends = TrendReq(hl='en-US', tz=360)
pytrends.build_payload(kw_list=search_keywords, timeframe='today 12-m')
# Get related queries - this returns a dictionary
queries_data = pytrends.related_queries()
# Extract data for the first keyword
if queries_data and search_keywords[0] in queries_data:
keyword_data = queries_data[search_keywords[0]]
# Create two separate dataframes for top and rising
top_df = keyword_data.get('top', pd.DataFrame())
rising_df = keyword_data.get('rising', pd.DataFrame())
return {
'top': top_df if not top_df.empty else pd.DataFrame(),
'rising': rising_df if not rising_df.empty else pd.DataFrame()
}
except Exception as e:
logger.error(f"Error in related queries: {e}")
return {'top': pd.DataFrame(), 'rising': pd.DataFrame()}
def get_source(url):
@@ -507,22 +438,17 @@ def do_google_trends_analysis(search_term):
else:
all_the_keywords.append(suggestions_df['Keywords'].tolist())
all_the_keywords = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in all_the_keywords])
# Generate a random sleep time between 2 and 3 seconds
time.sleep(random.uniform(2, 3))
#
# # FIXME: Get result from vision GPT. Fetch and visualize Google Trends data
# #trends_data = fetch_google_trends_interest_overtime("llamaindex")
#
# # FIXME: Plot Interest Over time.
# result_df = plot_interest_by_region(search_term)
#
# Display additional information
try:
result_df = get_related_topics_and_save_csv(search_term)
logger.info(f"Related topics:: result_df: {result_df}")
# Extract 'Top' topic_title
if result_df:
top_topic_title = result_df['topic_title'].values.tolist()
top_topic_title = result_df['top']['topic_title'].values.tolist()
# Join each sublist into one string separated by comma
#top_topic_title = [','.join(filter(None, map(str, sublist))) for sublist in top_topic_title]
top_topic_title = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in top_topic_title])
@@ -551,3 +477,77 @@ def do_google_trends_analysis(search_term):
return(all_the_keywords)
except Exception as e:
logger.error(f"Error in Google Trends Analysis: {e}")
def get_trending_searches(country='united_states'):
"""Get trending searches for a specific country."""
try:
pytrends = TrendReq(hl='en-US', tz=360)
trending_searches = pytrends.trending_searches(pn=country)
return trending_searches
except Exception as e:
logger.error(f"Error getting trending searches: {e}")
return pd.DataFrame()
def display_trending_searches(trending_df):
"""Display trending searches in the UI."""
if trending_df.empty:
st.info("No trending searches data available.")
return
st.subheader("📊 Trending Searches")
# Display as numbered list with emojis
for idx, search in enumerate(trending_df[0].head(10), 1):
st.write(f"{idx}. 🔍 {search}")
def get_realtime_trends(country='US'):
"""Get realtime trending searches for a specific country."""
try:
pytrends = TrendReq(hl='en-US', tz=360)
realtime_trends = pytrends.realtime_trending_searches(pn=country)
return realtime_trends
except Exception as e:
logger.error(f"Error getting realtime trends: {e}")
return pd.DataFrame()
def display_realtime_trends(trends_df):
"""Display realtime trending searches in the UI."""
if trends_df.empty:
st.info("No realtime trends data available.")
return
st.subheader("⚡ Realtime Trends")
# Create tabs for different categories
if not trends_df.empty:
# Display top 5 trends with their titles and articles
for _, row in trends_df.head(5).iterrows():
with st.expander(f"🔥 {row.get('title', 'Trending Topic')}"):
st.write(f"**Traffic:** {row.get('traffic', 'N/A')}")
if 'articles' in row:
st.write("📰 Related Articles:")
for article in row['articles'][:3]: # Show top 3 articles
st.write(f"- {article['title']}")
def display_google_trends_data(trends_data, search_keyword):
# ... existing code ...
# Create tabs for different sections
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
"Related Keywords",
"Interest Over Time",
"Regional Interest",
"Related Queries",
"Related Topics",
"Trending Now"
])
# ... existing tab code ...
with tab6:
col1, col2 = st.columns(2)
with col1:
display_trending_searches(trends_data.get('trending_searches', pd.DataFrame()))
with col2:
display_realtime_trends(trends_data.get('realtime_trends', pd.DataFrame()))

View File

@@ -27,15 +27,19 @@ from pathlib import Path
import sys
from datetime import datetime
import streamlit as st
import pandas as pd
import random
import numpy as np
from lib.alwrity_ui.display_google_serp_results import (
process_research_results,
process_search_results,
display_research_results
)
from lib.alwrity_ui.google_trends_ui import display_google_trends_data, process_trends_data
from .tavily_ai_search import get_tavilyai_results
from .metaphor_basic_neural_web_search import metaphor_search_articles
from .metaphor_basic_neural_web_search import metaphor_search_articles, streamlit_display_metaphor_results
from .google_serp_search import google_search
from .google_trends_researcher import do_google_trends_analysis
#from .google_gemini_web_researcher import do_gemini_web_research
@@ -56,6 +60,10 @@ def gpt_web_researcher(search_keywords, search_mode, **kwargs):
logger.debug(f"Additional parameters: {kwargs}")
try:
# Reset session state variables for this research operation
if 'metaphor_results_displayed' in st.session_state:
del st.session_state.metaphor_results_displayed
# Initialize result container
research_results = None
@@ -157,13 +165,76 @@ def gpt_web_researcher(search_keywords, search_mode, **kwargs):
update_progress("Metaphor AI search failed, continuing with Tavily results only...", level="warning")
else:
update_progress("Metaphor AI search completed successfully", progress=75)
# Add debug logging to check the structure of metaphor_results
logger.debug(f"Metaphor results structure: {type(metaphor_results)}")
if isinstance(metaphor_results, dict):
logger.debug(f"Metaphor results keys: {metaphor_results.keys()}")
if 'data' in metaphor_results:
logger.debug(f"Metaphor data keys: {metaphor_results['data'].keys()}")
if 'results' in metaphor_results['data']:
logger.debug(f"Number of results: {len(metaphor_results['data']['results'])}")
# Display Metaphor results only if not already displayed
if 'metaphor_results_displayed' not in st.session_state:
st.session_state.metaphor_results_displayed = True
# Make sure to pass the correct parameters to streamlit_display_metaphor_results
streamlit_display_metaphor_results(metaphor_results, search_keywords)
# Add Google Trends Analysis
update_progress("Initiating Google Trends analysis...", progress=80)
try:
# Add an informative message about Google Trends
with st.expander(" About Google Trends Analysis", expanded=False):
st.markdown("""
**What is Google Trends Analysis?**
Google Trends Analysis provides insights into how often a particular search-term is entered relative to the total search-volume across various regions of the world, and in various languages.
**What data will be shown?**
- **Related Keywords**: Terms that are frequently searched together with your keyword
- **Interest Over Time**: How interest in your keyword has changed over the past 12 months
- **Regional Interest**: Where in the world your keyword is most popular
- **Related Queries**: What people search for before and after searching for your keyword
- **Related Topics**: Topics that are closely related to your keyword
**How to use this data:**
- Identify trending topics in your industry
- Understand seasonal patterns in search behavior
- Discover related keywords for content planning
- Target content to specific regions with high interest
""")
trends_results = do_google_pytrends_analysis(search_keywords)
if trends_results:
update_progress("Google Trends analysis completed successfully", progress=90)
# Store trends results in the research_results
if metaphor_results:
metaphor_results['trends_data'] = trends_results
else:
# If metaphor_results is None, create a new container for results
metaphor_results = {'trends_data': trends_results}
# Display Google Trends data using the new UI module
display_google_trends_data(trends_results, search_keywords)
else:
update_progress("Google Trends analysis returned no results", level="warning")
except Exception as trends_err:
logger.error(f"Google Trends analysis failed: {trends_err}")
update_progress("Google Trends analysis failed", level="warning")
st.error(f"Error in Google Trends analysis: {str(trends_err)}")
# Return the combined results
update_progress("Research completed!", progress=100, level="success")
return metaphor_results or t_results
except Exception as ai_err:
error_msg = f"AI research pipeline failed: {str(ai_err)}"
logger.error(error_msg, exc_info=True)
update_progress(error_msg, level="error")
raise
else:
error_msg = f"Unsupported search mode: {search_mode}"
logger.error(error_msg)
@@ -316,13 +387,355 @@ def do_metaphor_ai_research(search_keywords):
return None, None
def do_google_pytrends_analysis(search_keywords):
""" """
def do_google_pytrends_analysis(keywords):
"""
Perform Google Trends analysis for the given keywords.
Args:
keywords (str): The search keywords to analyze
Returns:
dict: A dictionary containing formatted Google Trends data with the following keys:
- related_keywords: List of related keywords
- interest_over_time: DataFrame with date and interest columns
- regional_interest: DataFrame with country_code, country, and interest columns
- related_queries: DataFrame with query and value columns
- related_topics: DataFrame with topic and value columns
"""
logger.info(f"Performing Google Trends analysis for keywords: {keywords}")
# Create a progress container for Streamlit
progress_container = st.empty()
progress_bar = st.progress(0)
def update_progress(message, progress=None, level="info"):
"""Helper function to update progress in Streamlit UI"""
if progress is not None:
progress_bar.progress(progress)
if level == "error":
progress_container.error(f"🚫 {message}")
elif level == "warning":
progress_container.warning(f"⚠️ {message}")
else:
progress_container.info(f"🔄 {message}")
logger.debug(f"Progress update [{level}]: {message}")
try:
logger.info(f"Do Google Trends analysis for given keywords: {search_keywords}")
return(do_google_trends_analysis(search_keywords))
except Exception as err:
logger.error(f"Failed to do google trends analysis: {err}")
# Initialize the formatted data dictionary
formatted_data = {
'related_keywords': [],
'interest_over_time': pd.DataFrame(),
'regional_interest': pd.DataFrame(),
'related_queries': pd.DataFrame(),
'related_topics': pd.DataFrame()
}
# Get raw trends data from google_trends_researcher
update_progress("Fetching Google Trends data...", progress=10)
raw_trends_data = do_google_trends_analysis(keywords)
if not raw_trends_data:
logger.warning("No Google Trends data returned")
update_progress("No Google Trends data returned", level="warning", progress=20)
return formatted_data
# Process related keywords from the raw data
update_progress("Processing related keywords...", progress=30)
if isinstance(raw_trends_data, list):
formatted_data['related_keywords'] = raw_trends_data
elif isinstance(raw_trends_data, dict):
if 'keywords' in raw_trends_data:
formatted_data['related_keywords'] = raw_trends_data['keywords']
if 'interest_over_time' in raw_trends_data:
formatted_data['interest_over_time'] = raw_trends_data['interest_over_time']
if 'regional_interest' in raw_trends_data:
formatted_data['regional_interest'] = raw_trends_data['regional_interest']
if 'related_queries' in raw_trends_data:
formatted_data['related_queries'] = raw_trends_data['related_queries']
if 'related_topics' in raw_trends_data:
formatted_data['related_topics'] = raw_trends_data['related_topics']
# If we have keywords but missing other data, try to fetch them using pytrends directly
if formatted_data['related_keywords'] and (
formatted_data['interest_over_time'].empty or
formatted_data['regional_interest'].empty or
formatted_data['related_queries'].empty or
formatted_data['related_topics'].empty
):
try:
update_progress("Fetching additional data from Google Trends API...", progress=40)
from pytrends.request import TrendReq
pytrends = TrendReq(hl='en-US', tz=360)
# Build payload with the main keyword
update_progress("Building search payload...", progress=45)
pytrends.build_payload([keywords], timeframe='today 12-m', geo='')
# Get interest over time if missing
if formatted_data['interest_over_time'].empty:
try:
update_progress("Fetching interest over time data...", progress=50)
interest_df = pytrends.interest_over_time()
if not interest_df.empty:
formatted_data['interest_over_time'] = interest_df.reset_index()
update_progress(f"Successfully fetched interest over time data with {len(formatted_data['interest_over_time'])} data points", progress=55)
else:
update_progress("No interest over time data available", level="warning", progress=55)
except Exception as e:
logger.error(f"Error fetching interest over time: {e}")
update_progress(f"Error fetching interest over time: {str(e)}", level="warning", progress=55)
# Get regional interest if missing
if formatted_data['regional_interest'].empty:
try:
update_progress("Fetching regional interest data...", progress=60)
regional_df = pytrends.interest_by_region()
if not regional_df.empty:
formatted_data['regional_interest'] = regional_df.reset_index()
update_progress(f"Successfully fetched regional interest data for {len(formatted_data['regional_interest'])} regions", progress=65)
else:
update_progress("No regional interest data available", level="warning", progress=65)
except Exception as e:
logger.error(f"Error fetching regional interest: {e}")
update_progress(f"Error fetching regional interest: {str(e)}", level="warning", progress=65)
# Get related queries if missing
if formatted_data['related_queries'].empty:
try:
update_progress("Fetching related queries data...", progress=70)
# Get related queries data
related_queries = pytrends.related_queries()
# Create empty DataFrame as fallback
formatted_data['related_queries'] = pd.DataFrame(columns=['query', 'value'])
# Simple direct approach to avoid list index errors
if related_queries and isinstance(related_queries, dict):
# Check if our keyword exists in the results
if keywords in related_queries:
keyword_data = related_queries[keywords]
# Process top queries if available
if 'top' in keyword_data and keyword_data['top'] is not None:
try:
update_progress("Processing top related queries...", progress=75)
# Convert to DataFrame if it's not already
if isinstance(keyword_data['top'], pd.DataFrame):
top_df = keyword_data['top']
else:
# Try to convert to DataFrame
top_df = pd.DataFrame(keyword_data['top'])
# Ensure it has the right columns
if not top_df.empty:
# Rename columns if needed
if 'query' in top_df.columns:
# Already has the right column name
pass
elif len(top_df.columns) > 0:
# Use first column as query
top_df = top_df.rename(columns={top_df.columns[0]: 'query'})
# Add to our results
formatted_data['related_queries'] = top_df
update_progress(f"Successfully processed {len(top_df)} top related queries", progress=80)
except Exception as e:
logger.warning(f"Error processing top queries: {e}")
update_progress(f"Error processing top queries: {str(e)}", level="warning", progress=80)
# Process rising queries if available
if 'rising' in keyword_data and keyword_data['rising'] is not None:
try:
update_progress("Processing rising related queries...", progress=85)
# Convert to DataFrame if it's not already
if isinstance(keyword_data['rising'], pd.DataFrame):
rising_df = keyword_data['rising']
else:
# Try to convert to DataFrame
rising_df = pd.DataFrame(keyword_data['rising'])
# Ensure it has the right columns
if not rising_df.empty:
# Rename columns if needed
if 'query' in rising_df.columns:
# Already has the right column name
pass
elif len(rising_df.columns) > 0:
# Use first column as query
rising_df = rising_df.rename(columns={rising_df.columns[0]: 'query'})
# Combine with existing data if we have any
if not formatted_data['related_queries'].empty:
formatted_data['related_queries'] = pd.concat([formatted_data['related_queries'], rising_df])
update_progress(f"Successfully processed {len(rising_df)} rising related queries", progress=90)
else:
formatted_data['related_queries'] = rising_df
update_progress(f"Successfully processed {len(rising_df)} rising related queries", progress=90)
except Exception as e:
logger.warning(f"Error processing rising queries: {e}")
update_progress(f"Error processing rising queries: {str(e)}", level="warning", progress=90)
except Exception as e:
logger.error(f"Error fetching related queries: {e}")
update_progress(f"Error fetching related queries: {str(e)}", level="warning", progress=90)
# Ensure we have an empty DataFrame with the right columns
formatted_data['related_queries'] = pd.DataFrame(columns=['query', 'value'])
# Get related topics if missing
if formatted_data['related_topics'].empty:
try:
update_progress("Fetching related topics data...", progress=95)
# Get related topics data
related_topics = pytrends.related_topics()
# Create empty DataFrame as fallback
formatted_data['related_topics'] = pd.DataFrame(columns=['topic', 'value'])
# Simple direct approach to avoid list index errors
if related_topics and isinstance(related_topics, dict):
# Check if our keyword exists in the results
if keywords in related_topics:
keyword_data = related_topics[keywords]
# Process top topics if available
if 'top' in keyword_data and keyword_data['top'] is not None:
try:
update_progress("Processing top related topics...", progress=97)
# Convert to DataFrame if it's not already
if isinstance(keyword_data['top'], pd.DataFrame):
top_df = keyword_data['top']
else:
# Try to convert to DataFrame
top_df = pd.DataFrame(keyword_data['top'])
# Ensure it has the right columns
if not top_df.empty:
# Rename columns if needed
if 'topic_title' in top_df.columns:
top_df = top_df.rename(columns={'topic_title': 'topic'})
elif len(top_df.columns) > 0 and 'topic' not in top_df.columns:
# Use first column as topic
top_df = top_df.rename(columns={top_df.columns[0]: 'topic'})
# Add to our results
formatted_data['related_topics'] = top_df
update_progress(f"Successfully processed {len(top_df)} top related topics", progress=98)
except Exception as e:
logger.warning(f"Error processing top topics: {e}")
update_progress(f"Error processing top topics: {str(e)}", level="warning", progress=98)
# Process rising topics if available
if 'rising' in keyword_data and keyword_data['rising'] is not None:
try:
update_progress("Processing rising related topics...", progress=99)
# Convert to DataFrame if it's not already
if isinstance(keyword_data['rising'], pd.DataFrame):
rising_df = keyword_data['rising']
else:
# Try to convert to DataFrame
rising_df = pd.DataFrame(keyword_data['rising'])
# Ensure it has the right columns
if not rising_df.empty:
# Rename columns if needed
if 'topic_title' in rising_df.columns:
rising_df = rising_df.rename(columns={'topic_title': 'topic'})
elif len(rising_df.columns) > 0 and 'topic' not in rising_df.columns:
# Use first column as topic
rising_df = rising_df.rename(columns={rising_df.columns[0]: 'topic'})
# Combine with existing data if we have any
if not formatted_data['related_topics'].empty:
formatted_data['related_topics'] = pd.concat([formatted_data['related_topics'], rising_df])
update_progress(f"Successfully processed {len(rising_df)} rising related topics", progress=100)
else:
formatted_data['related_topics'] = rising_df
update_progress(f"Successfully processed {len(rising_df)} rising related topics", progress=100)
except Exception as e:
logger.warning(f"Error processing rising topics: {e}")
update_progress(f"Error processing rising topics: {str(e)}", level="warning", progress=100)
except Exception as e:
logger.error(f"Error fetching related topics: {e}")
update_progress(f"Error fetching related topics: {str(e)}", level="warning", progress=100)
# Ensure we have an empty DataFrame with the right columns
formatted_data['related_topics'] = pd.DataFrame(columns=['topic', 'value'])
except Exception as e:
logger.error(f"Error fetching additional trends data: {e}")
update_progress(f"Error fetching additional trends data: {str(e)}", level="warning", progress=100)
# Ensure all DataFrames have the correct column names for the UI
update_progress("Finalizing data formatting...", progress=100)
if not formatted_data['interest_over_time'].empty:
if 'date' not in formatted_data['interest_over_time'].columns:
formatted_data['interest_over_time'] = formatted_data['interest_over_time'].reset_index()
if 'interest' not in formatted_data['interest_over_time'].columns and keywords in formatted_data['interest_over_time'].columns:
formatted_data['interest_over_time'] = formatted_data['interest_over_time'].rename(columns={keywords: 'interest'})
if not formatted_data['regional_interest'].empty:
if 'country_code' not in formatted_data['regional_interest'].columns and 'geoName' in formatted_data['regional_interest'].columns:
formatted_data['regional_interest'] = formatted_data['regional_interest'].rename(columns={'geoName': 'country_code'})
if 'interest' not in formatted_data['regional_interest'].columns and keywords in formatted_data['regional_interest'].columns:
formatted_data['regional_interest'] = formatted_data['regional_interest'].rename(columns={keywords: 'interest'})
if not formatted_data['related_queries'].empty:
# Handle different column names that might be present in the related queries DataFrame
if 'query' not in formatted_data['related_queries'].columns:
if 'Top query' in formatted_data['related_queries'].columns:
formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={'Top query': 'query'})
elif 'Rising query' in formatted_data['related_queries'].columns:
formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={'Rising query': 'query'})
elif 'query' not in formatted_data['related_queries'].columns and len(formatted_data['related_queries'].columns) > 0:
# If we have a DataFrame but no 'query' column, use the first column as 'query'
first_col = formatted_data['related_queries'].columns[0]
formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={first_col: 'query'})
if 'value' not in formatted_data['related_queries'].columns and len(formatted_data['related_queries'].columns) > 1:
# If we have a second column, use it as 'value'
second_col = formatted_data['related_queries'].columns[1]
formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={second_col: 'value'})
elif 'value' not in formatted_data['related_queries'].columns:
# If no 'value' column exists, add one with default values
formatted_data['related_queries']['value'] = 0
if not formatted_data['related_topics'].empty:
# Handle different column names that might be present in the related topics DataFrame
if 'topic' not in formatted_data['related_topics'].columns:
if 'topic_title' in formatted_data['related_topics'].columns:
formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={'topic_title': 'topic'})
elif 'topic' not in formatted_data['related_topics'].columns and len(formatted_data['related_topics'].columns) > 0:
# If we have a DataFrame but no 'topic' column, use the first column as 'topic'
first_col = formatted_data['related_topics'].columns[0]
formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={first_col: 'topic'})
if 'value' not in formatted_data['related_topics'].columns and len(formatted_data['related_topics'].columns) > 1:
# If we have a second column, use it as 'value'
second_col = formatted_data['related_topics'].columns[1]
formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={second_col: 'value'})
elif 'value' not in formatted_data['related_topics'].columns:
# If no 'value' column exists, add one with default values
formatted_data['related_topics']['value'] = 0
# Clear the progress container after completion
progress_container.empty()
progress_bar.empty()
return formatted_data
except Exception as e:
logger.error(f"Error in Google Trends analysis: {e}")
update_progress(f"Error in Google Trends analysis: {str(e)}", level="error", progress=100)
# Clear the progress container after error
progress_container.empty()
progress_bar.empty()
return {
'related_keywords': [],
'interest_over_time': pd.DataFrame(),
'regional_interest': pd.DataFrame(),
'related_queries': pd.DataFrame(),
'related_topics': pd.DataFrame()
}
def metaphor_extract_titles_or_text(json_data, return_titles=True):

View File

@@ -262,285 +262,185 @@ def metaphor_search_articles(query, search_options: dict = None):
except Exception as tavily_err:
logger.warning(f"Error getting Tavily answer: {tavily_err}")
# Display results in Streamlit
streamlit_display_metaphor_results(formatted_response)
# Return the formatted response without displaying it
# The display will be handled by gpt_web_researcher
return formatted_response
except Exception as e:
logger.error(f"Error in Exa searching articles: {e}")
return None
def streamlit_display_metaphor_results(metaphor_response: dict):
"""
Display Metaphor search results in Streamlit with enhanced metrics and popovers
def streamlit_display_metaphor_results(metaphor_response, search_keywords=None):
"""Display Metaphor search results in Streamlit."""
Args:
metaphor_response (dict): Response from Metaphor search
"""
if not metaphor_response or 'data' not in metaphor_response:
st.error("No valid Metaphor search results to display")
if not metaphor_response:
st.error("No search results found.")
return
# Add debug logging
logger.debug(f"Displaying Metaphor results. Type: {type(metaphor_response)}")
if isinstance(metaphor_response, dict):
logger.debug(f"Metaphor response keys: {metaphor_response.keys()}")
# Initialize session state variables if they don't exist
if 'search_insights' not in st.session_state:
st.session_state.search_insights = None
if 'metaphor_response' not in st.session_state:
st.session_state.metaphor_response = metaphor_response
st.session_state.metaphor_response = None
if 'insights_generated' not in st.session_state:
st.session_state.insights_generated = False
# Update the stored metaphor_response with the latest data
# Store the current response in session state
st.session_state.metaphor_response = metaphor_response
# Display metrics in columns
col1, col2, col3 = st.columns(3)
# Calculate metrics
results = metaphor_response['data']['results']
# Display search results
st.subheader("🔍 Search Results")
# Calculate metrics - handle different data structures
results = []
if isinstance(metaphor_response, dict):
if 'data' in metaphor_response and 'results' in metaphor_response['data']:
results = metaphor_response['data']['results']
elif 'results' in metaphor_response:
results = metaphor_response['results']
total_results = len(results)
avg_score = sum(r['score'] for r in results if r['score']) / total_results if total_results > 0 else 0
avg_relevance = sum(r.get('score', 0) for r in results) / total_results if total_results > 0 else 0
# Display metrics
col1, col2 = st.columns(2)
with col1:
st.metric(
label="Total Results",
value=total_results
)
st.metric("Total Results", total_results)
with col2:
if metaphor_response['data'].get('costDollars'):
cost = metaphor_response['data']['costDollars']
st.metric(
label="Search Cost",
value=f"${cost['total']:.3f}"
)
with col3:
st.metric(
label="Average Relevance Score",
value=f"{avg_score:.2f}"
)
# Display AI-generated answers side by side
if 'answer' in metaphor_response or 'tavily_answer' in metaphor_response:
st.markdown("### 🤖 AI-Generated Research Answers")
# Create two columns for side-by-side display
tavily_col, metaphor_col = st.columns(2)
# Display Tavily answer if available
with tavily_col:
if 'tavily_answer' in metaphor_response:
st.markdown("#### 🔍 Tavily AI Answer")
st.markdown(f"""
<div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 5px solid #FF4B4B;">
{metaphor_response['tavily_answer']}
</div>
""", unsafe_allow_html=True)
if metaphor_response.get('tavily_cost_dollars'):
st.caption(f"Tavily Answer Cost: ${metaphor_response['tavily_cost_dollars']['total']:.3f}")
if metaphor_response.get('tavily_citations'):
with st.expander("📚 Tavily Sources"):
for idx, citation in enumerate(metaphor_response['tavily_citations'], 1):
st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})")
else:
st.markdown("#### 🔍 Tavily AI Answer")
st.info("No Tavily answer available for this query.")
# Display Metaphor answer if available
with metaphor_col:
if 'answer' in metaphor_response:
st.markdown("#### 🔍 Metaphor AI Answer")
st.markdown(f"""
<div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 5px solid #4CAF50;">
{metaphor_response['answer']}
</div>
""", unsafe_allow_html=True)
if metaphor_response.get('answerCostDollars'):
st.caption(f"Metaphor Answer Cost: ${metaphor_response['answerCostDollars']['total']:.3f}")
if metaphor_response.get('citations'):
with st.expander("📚 Metaphor Sources"):
for idx, citation in enumerate(metaphor_response['citations'], 1):
st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})")
else:
st.markdown("#### 🔍 Metaphor AI Answer")
st.info("No Metaphor answer available for this query.")
st.metric("Average Relevance Score", f"{avg_relevance:.2f}")
# Add "Get Search Insights" button - moved outside the AI answers conditional
st.markdown("### 🔍 Search Insights")
# Display AI-generated answers if available
if 'tavily_answer' in metaphor_response or 'metaphor_answer' in metaphor_response:
st.subheader("🤖 AI-Generated Answers")
if 'tavily_answer' in metaphor_response:
st.markdown("**Tavily AI Answer:**")
st.write(metaphor_response['tavily_answer'])
if 'metaphor_answer' in metaphor_response:
st.markdown("**Metaphor AI Answer:**")
st.write(metaphor_response['metaphor_answer'])
# Create a container for the insights
insights_container = st.container()
# Use a button with a callback function
if st.button("Generate Search Insights", type="primary"):
# Set a flag in session state to indicate that insights should be generated
# Get Search Insights button
if st.button("Generate Search Insights", key="metaphor_generate_insights_button"):
st.session_state.insights_generated = True
# Store the current metaphor_response in session state
st.session_state.metaphor_response = metaphor_response
# Redirect to the same page with a query parameter to trigger insights generation
st.experimental_rerun()
# If insights should be generated, do it in a separate container
if st.session_state.insights_generated:
with insights_container:
with st.spinner("Analyzing search results to generate insights..."):
# Get the stored metaphor_response from session state
stored_response = st.session_state.metaphor_response
stored_results = stored_response['data']['results']
# Prepare data for analysis
analysis_data = {
"metaphor_results": stored_results,
"metaphor_answer": stored_response.get("answer", ""),
"tavily_answer": stored_response.get("tavily_answer", ""),
"metaphor_citations": stored_response.get("citations", []),
"tavily_citations": stored_response.get("tavily_citations", [])
}
# Create the analysis prompt
analysis_prompt = f"""
**Search Intent & User Needs Analysis**
I have conducted research using both Tavily and Metaphor AI search engines.
Below is the data from both sources:
**Metaphor AI Answer:**
{analysis_data["metaphor_answer"]}
**Tavily AI Answer:**
{analysis_data["tavily_answer"]}
**Search Results:**
{[f"{i+1}. {r['title']} - {r['summary']}" for i, r in enumerate(analysis_data["metaphor_results"])]}
**Citations:**
{[f"{i+1}. {c.get('title', 'Untitled')} - {c.get('url', 'No URL')}" for i, c in enumerate(analysis_data["metaphor_citations"] + analysis_data["tavily_citations"])]}
Based on this research data, please provide the following insights:
**Search Intent & User Needs**
```
Review the research data and identify:
1. The distribution of search intent (categorize as Informational/Commercial/Navigational/Transactional)
2. Most common user questions and their patterns
3. Frequently mentioned pain points or challenges
4. Recurring solutions or approaches to addressing these challenges
5. Gaps between user questions and available answers
Present findings in a structured format with percentages and specific examples.
```
Format your response as a comprehensive analysis with clear sections, bullet points, and examples from the research data.
"""
try:
# Import the llm_text_gen function
import importlib
text_gen_module = importlib.import_module('lib.gpt_providers.text_generation.main_text_generation')
if hasattr(text_gen_module, 'llm_text_gen'):
# Generate insights using llm_text_gen
insights = text_gen_module.llm_text_gen(analysis_prompt)
# Store insights in session state
st.session_state.search_insights = insights
# Reset the flag to prevent regeneration on next rerun
st.session_state.insights_generated = False
else:
st.error("Could not find llm_text_gen function in the text generation module.")
except Exception as e:
st.error(f"Error generating insights: {str(e)}")
logger.error(f"Error generating insights: {e}")
st.rerun()
# Display insights if they exist in session state
if st.session_state.search_insights:
with insights_container:
st.markdown("### 🔍 Search Intent & User Needs Analysis")
st.markdown(st.session_state.search_insights)
# Create DataFrame from results
df = pd.DataFrame(results)
st.subheader("🔍 Search Insights")
st.write(st.session_state.search_insights)
# Display search results in a data editor
st.subheader("📊 Detailed Results")
# Prepare data for display
display_df = df.copy()
display_df['Visit Site'] = display_df['url']
results_data = []
for result in results:
result_data = {
'Title': result.get('title', ''),
'URL': result.get('url', ''),
'Snippet': result.get('summary', ''),
'Relevance Score': result.get('score', 0),
'Published Date': result.get('publishedDate', '')
}
results_data.append(result_data)
# Format publishedDate as string if it exists
if 'publishedDate' in display_df.columns:
display_df['publishedDate'] = display_df['publishedDate'].apply(
lambda x: x[:10] if isinstance(x, str) else 'N/A'
# Create DataFrame
df = pd.DataFrame(results_data)
# Display the DataFrame if it's not empty
if not df.empty:
# Configure columns
st.dataframe(
df,
column_config={
"Title": st.column_config.TextColumn(
"Title",
help="Title of the search result",
width="large",
),
"URL": st.column_config.LinkColumn(
"URL",
help="Link to the search result",
width="medium",
display_text="Visit Article",
),
"Snippet": st.column_config.TextColumn(
"Snippet",
help="Summary of the search result",
width="large",
),
"Relevance Score": st.column_config.NumberColumn(
"Relevance Score",
help="Relevance score of the search result",
format="%.2f",
width="small",
),
"Published Date": st.column_config.DateColumn(
"Published Date",
help="Publication date of the search result",
width="medium",
),
},
hide_index=True,
)
# Configure columns for data editor
columns = {
'title': st.column_config.TextColumn(
'Title',
width='large',
required=True,
),
'author': st.column_config.TextColumn(
'Author',
width='medium',
),
'publishedDate': st.column_config.TextColumn(
'Published Date',
width='medium',
),
'score': st.column_config.NumberColumn(
'Relevance Score',
width='small',
format="%.2f"
),
'Visit Site': st.column_config.LinkColumn(
'Link',
width='small',
display_text='Visit Site',
),
'summary': st.column_config.TextColumn(
'Summary',
width='large',
required=True,
)
}
# Display results in data editor
st.data_editor(
display_df,
column_config=columns,
hide_index=True,
num_rows='dynamic',
disabled=True,
column_order=['title', 'author', 'publishedDate', 'score', 'summary', 'Visit Site']
)
# Display detailed summaries with popovers
st.write("### Detailed Summaries")
for idx, result in enumerate(results, 1):
with st.expander(f"📄 {result['title']}", expanded=False):
col1, col2 = st.columns([3, 1])
with col1:
st.markdown(f"**Summary**")
st.markdown(result['summary'])
with col2:
st.markdown("**Details**")
st.markdown(f"**Author:** {result['author'] if result['author'] else 'N/A'}")
st.markdown(f"**Published:** {result['publishedDate'][:10] if result['publishedDate'] else 'N/A'}")
st.markdown(f"**Score:** {result['score']:.2f}")
st.markdown(f"[Visit Site]({result['url']})")
# Display search metadata
st.divider()
col1, col2 = st.columns(2)
with col1:
st.caption(f"Search Type: {metaphor_response['data']['resolvedSearchType']}")
with col2:
st.caption(f"Request ID: {metaphor_response['data']['requestId']}")
# Add popover for snippets
st.markdown("""
<style>
.snippet-popover {
position: relative;
display: inline-block;
}
.snippet-popover .snippet-content {
visibility: hidden;
width: 300px;
background-color: #f9f9f9;
color: #333;
text-align: left;
border-radius: 6px;
padding: 10px;
position: absolute;
z-index: 1;
bottom: 125%;
left: 50%;
margin-left: -150px;
opacity: 0;
transition: opacity 0.3s;
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
}
.snippet-popover:hover .snippet-content {
visibility: visible;
opacity: 1;
}
</style>
""", unsafe_allow_html=True)
# Display snippets with popover
st.subheader("📝 Snippets")
for i, result in enumerate(results):
snippet = result.get('summary', '')
if snippet:
st.markdown(f"""
<div class="snippet-popover">
<strong>{result.get('title', '')}</strong>
<div class="snippet-content">
{snippet}
</div>
</div>
""", unsafe_allow_html=True)
else:
st.info("No detailed results available.")
# Add a collapsible section for the raw JSON data
with st.expander("Research Results (JSON)", expanded=False):
st.json(metaphor_response)
def metaphor_news_summarizer(news_keywords):

View File

@@ -0,0 +1,425 @@
"""
Module for displaying Google Trends data in the Streamlit UI.
This module provides functions for visualizing Google Trends data, including:
- Interest over time
- Regional interest
- Related queries
- Related topics
"""
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import logging
# Set up logging
logger = logging.getLogger(__name__)
def display_google_trends_data(trends_data, search_keyword):
"""
Display Google Trends data in a structured format with tabs for different sections.
Args:
trends_data (dict): Dictionary containing Google Trends data
search_keyword (str): The search keyword used for the analysis
"""
if not trends_data:
st.warning("No Google Trends data available for this search.")
return
st.subheader(f"Google Trends Analysis for '{search_keyword}'")
# Add an informative message about Google Trends
with st.expander(" About Google Trends Data", expanded=False):
st.markdown("""
**What is Google Trends?**
Google Trends is a public web facility that shows how often a particular search-term is entered relative to the total search-volume across various regions of the world, and in various languages.
**What data is shown here?**
- **Related Keywords**: Terms that are frequently searched together with your keyword
- **Interest Over Time**: How interest in your keyword has changed over the past 12 months
- **Regional Interest**: Where in the world your keyword is most popular
- **Related Queries**: What people search for before and after searching for your keyword
- **Related Topics**: Topics that are closely related to your keyword
**How to interpret the data:**
- Interest values range from 0 to 100, where 100 is the peak popularity for the term
- A value of 50 means the term is half as popular as the peak
- A value of 0 means there was not enough data for this term
""")
# Create tabs for different sections
tab1, tab2, tab3, tab4, tab5 = st.tabs([
"Related Keywords",
"Interest Over Time",
"Regional Interest",
"Related Queries",
"Related Topics"
])
with tab1:
display_keywords_section(trends_data.get('related_keywords', []))
with tab2:
display_interest_over_time(trends_data.get('interest_over_time', pd.DataFrame()))
with tab3:
display_regional_interest(trends_data.get('regional_interest', pd.DataFrame()))
with tab4:
display_related_queries(trends_data.get('related_queries', pd.DataFrame()))
with tab5:
display_related_topics(trends_data.get('related_topics', pd.DataFrame()))
# Add a footer with data source information
st.markdown("---")
st.caption("Data source: Google Trends | Last updated: " + pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"))
def display_keywords_section(keywords):
"""Display related keywords from Google Trends in a table format."""
if not keywords:
st.info("No related keywords data available.")
return
st.subheader("Related Keywords")
st.write("Keywords related to your search:")
# Add explanation about related keywords
with st.expander(" About Related Keywords", expanded=False):
st.markdown("""
**What are Related Keywords?**
Related keywords are terms that are frequently searched together with your main keyword.
These keywords can help you understand what topics are associated with your search term
and can be valuable for content planning and SEO strategies.
**How to use this data:**
- Use these keywords to expand your content strategy
- Identify gaps in your content that you could fill
- Understand what your audience is interested in
- Improve your SEO by incorporating these terms naturally in your content
""")
# Create a DataFrame for better display
df = pd.DataFrame(keywords, columns=['Keyword'])
st.dataframe(df, use_container_width=True)
# Add a note about the number of keywords
st.caption(f"Found {len(keywords)} related keywords")
def display_interest_over_time(interest_df):
"""Display a chart showing interest over time for a given search keyword."""
if interest_df.empty:
st.info("No interest over time data available.")
return
st.subheader("Interest Over Time")
# Add explanation about interest over time
with st.expander(" About Interest Over Time", expanded=False):
st.markdown("""
**What is Interest Over Time?**
Interest Over Time shows how interest in your search term has changed over the past 12 months.
The data is normalized and presented on a scale from 0 to 100, where 100 is the peak popularity
for the term, 50 means the term is half as popular, and 0 means there was not enough data.
**How to interpret this chart:**
- Look for peaks and valleys to identify trends
- Compare with seasonal patterns or events
- Identify if interest is growing, declining, or stable
- Use this data to time your content releases for maximum impact
""")
try:
# Ensure we have the required columns
if 'date' not in interest_df.columns:
st.error("Interest over time data is missing the 'date' column.")
return
if 'interest' not in interest_df.columns:
st.error("Interest over time data is missing the 'interest' column.")
return
# Create the chart
fig = px.line(
interest_df,
x='date',
y='interest',
title='Interest Over Time',
labels={'date': 'Date', 'interest': 'Interest'},
line_shape='spline'
)
fig.update_layout(
xaxis_title="Date",
yaxis_title="Interest",
hovermode='x unified'
)
st.plotly_chart(fig, use_container_width=True)
# Add summary statistics
if not interest_df.empty:
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Average Interest", f"{interest_df['interest'].mean():.1f}")
with col2:
st.metric("Peak Interest", f"{interest_df['interest'].max():.1f}")
with col3:
st.metric("Lowest Interest", f"{interest_df['interest'].min():.1f}")
except Exception as e:
st.error(f"Error displaying interest over time chart: {str(e)}")
logger.error(f"Error in display_interest_over_time: {e}")
def display_regional_interest(regional_df):
"""Display a chart showing interest by region for the search keyword."""
if regional_df.empty:
st.info("No regional interest data available.")
return
st.subheader("Regional Interest")
# Add explanation about regional interest
with st.expander(" About Regional Interest", expanded=False):
st.markdown("""
**What is Regional Interest?**
Regional Interest shows how interest in your search term varies across different countries.
The data is normalized and presented on a scale from 0 to 100, where 100 is the peak popularity
for the term in that region, 50 means the term is half as popular, and 0 means there was not enough data.
**How to interpret this map:**
- Darker colors indicate higher interest in that region
- Lighter colors indicate lower interest
- Hover over a country to see the exact interest value
- Use this data to target your content to specific regions
""")
try:
# Ensure we have the required columns
if 'country_code' not in regional_df.columns:
st.error("Regional interest data is missing the 'country_code' column.")
return
if 'interest' not in regional_df.columns:
st.error("Regional interest data is missing the 'interest' column.")
return
# Create the choropleth map
fig = go.Figure(data=go.Choropleth(
locations=regional_df['country_code'],
z=regional_df['interest'],
text=regional_df['country_code'], # This will show in the hover text
colorscale='Viridis',
colorbar_title="Interest Level",
zmin=0,
zmax=100,
marker_line_color='darkgray',
marker_line_width=0.5,
showscale=True,
colorbar=dict(
title="Interest Level",
tickformat=".0f",
tickmode="linear",
tick0=0,
dtick=20
)
))
# Update the layout for better visualization
fig.update_layout(
title=dict(
text='Regional Interest Distribution',
x=0.5,
xanchor='center'
),
geo=dict(
showframe=False,
showcoastlines=True,
projection_type='equirectangular',
showland=True,
landcolor='lightgray',
showocean=True,
oceancolor='aliceblue',
showcountries=True,
countrycolor='darkgray'
),
width=800,
height=500,
margin=dict(l=0, r=0, t=30, b=0)
)
# Display the map
st.plotly_chart(fig, use_container_width=True)
# Display top 5 countries with highest interest
if not regional_df.empty:
st.subheader("Top Regions by Interest")
top_regions = regional_df.sort_values('interest', ascending=False).head(5)
# Create a more visually appealing bar chart for top regions
fig_bar = go.Figure(data=[
go.Bar(
x=top_regions['country_code'],
y=top_regions['interest'],
text=top_regions['interest'].round(1),
textposition='auto',
marker_color='rgb(55, 83, 109)'
)
])
fig_bar.update_layout(
title='Top 5 Regions by Interest Level',
xaxis_title='Region',
yaxis_title='Interest Level',
yaxis_range=[0, 100],
showlegend=False
)
st.plotly_chart(fig_bar, use_container_width=True)
except Exception as e:
st.error(f"Error displaying regional interest chart: {str(e)}")
logger.error(f"Error in display_regional_interest: {e}")
def display_related_queries(queries_df):
"""Display related queries in a structured format."""
if queries_df.empty:
st.info("No related queries data available.")
return
st.subheader("Related Queries")
# Add explanation about related queries
with st.expander(" About Related Queries", expanded=False):
st.markdown("""
**What are Related Queries?**
Related Queries show what people search for before and after searching for your keyword.
These queries can help you understand the search intent and context around your keyword.
**How to interpret this data:**
- The 'value' column shows the relative interest compared to your main keyword
- Higher values indicate stronger association with your keyword
- Use these queries to expand your content strategy
- Identify what questions your audience is trying to answer
""")
try:
# Ensure we have the required columns
if 'query' not in queries_df.columns:
st.error("Related queries data is missing the 'query' column.")
return
if 'value' not in queries_df.columns:
st.error("Related queries data is missing the 'value' column.")
return
# Sort by value in descending order
queries_df = queries_df.sort_values('value', ascending=False)
# Display as a table
st.dataframe(queries_df, use_container_width=True)
# Add a note about the number of queries
st.caption(f"Found {len(queries_df)} related queries")
except Exception as e:
st.error(f"Error displaying related queries: {str(e)}")
logger.error(f"Error in display_related_queries: {e}")
def display_related_topics(topics_df):
"""Display related topics in a structured format."""
if topics_df.empty:
st.info("No related topics data available.")
return
st.subheader("Related Topics")
# Add explanation about related topics
with st.expander(" About Related Topics", expanded=False):
st.markdown("""
**What are Related Topics?**
Related Topics show broader topics that are associated with your search term.
These topics can help you understand the broader context and themes related to your keyword.
**How to interpret this data:**
- The 'value' column shows the relative interest compared to your main keyword
- Higher values indicate stronger association with your keyword
- Use these topics to understand the broader context of your keyword
- Identify themes that might be relevant to your content strategy
""")
try:
# Ensure we have the required columns
if 'topic' not in topics_df.columns:
st.error("Related topics data is missing the 'topic' column.")
return
if 'value' not in topics_df.columns:
st.error("Related topics data is missing the 'value' column.")
return
# Sort by value in descending order
topics_df = topics_df.sort_values('value', ascending=False)
# Display as a table
st.dataframe(topics_df, use_container_width=True)
# Add a note about the number of topics
st.caption(f"Found {len(topics_df)} related topics")
except Exception as e:
st.error(f"Error displaying related topics: {str(e)}")
logger.error(f"Error in display_related_topics: {e}")
def process_trends_data(trends_data):
"""
Process and format Google Trends data for display.
Args:
trends_data (dict): Raw Google Trends data
Returns:
dict: Formatted data ready for display
"""
if not trends_data:
return {}
processed_data = {}
# Process related keywords
if 'related_keywords' in trends_data:
processed_data['related_keywords'] = trends_data['related_keywords']
# Process interest over time
if 'interest_over_time' in trends_data and not trends_data['interest_over_time'].empty:
processed_data['interest_over_time'] = trends_data['interest_over_time']
# Process regional interest
if 'regional_interest' in trends_data and not trends_data['regional_interest'].empty:
processed_data['regional_interest'] = trends_data['regional_interest']
# Process related queries
if 'related_queries' in trends_data and not trends_data['related_queries'].empty:
processed_data['related_queries'] = trends_data['related_queries']
# Process related topics
if 'related_topics' in trends_data and not trends_data['related_topics'].empty:
processed_data['related_topics'] = trends_data['related_topics']
return processed_data

View File

@@ -102,7 +102,12 @@ def validate_api_keys():
return api_keys
def do_web_research():
"""Input keywords and do web research with advanced options."""
"""Main function to perform web research based on user input."""
# Reset session state variables for this research operation
if 'metaphor_results_displayed' in st.session_state:
del st.session_state.metaphor_results_displayed
logger.info("Starting do_web_research function")
try:
@@ -509,7 +514,7 @@ def do_web_research():
status_display.success("✨ Research completed!")
# Display results in an organized way
with st.expander("📊 Research Results", expanded=True):
with st.expander("📊 Research Results", expanded=False):
st.write(web_research_result)
else:
st.warning("No results found for your search")