Google trends data and keyword research

2025-04-05 22:50:43 +05:30
parent d7cfe2dd31
commit 284c61e776
5 changed files with 1122 additions and 379 deletions
--- a/lib/ai_web_researcher/google_trends_researcher.py
+++ b/lib/ai_web_researcher/google_trends_researcher.py
@@ -105,124 +105,55 @@ def plot_interest_by_region(kw_list):
 def get_related_queries_and_save_csv(keywords, hl='en-US', tz=360, cat=0, timeframe='today 12-m'):
    """
    Get related queries for the given search keywords and save the result to a CSV file.
    Args:
        search_keywords (list): List of search keywords.
        hl (str): Language parameter, default is 'en-US'.
        tz (int): Timezone parameter, default is 360.
        cat (int): Category parameter, default is 0.
        timeframe (str): Timeframe parameter, default is 'today 12-m'.
    Returns:
        pd.DataFrame: DataFrame containing related queries.
    """
    try:
        # Build model
        pytrends = TrendReq(hl=hl, tz=tz)
        pytrends.build_payload(kw_list=keywords, cat=cat, timeframe=timeframe)
        # Get related queries
        data = pytrends.related_queries()
        # Extract data from the result
        top_queries = list(data.values())[0]['top']
        rising_queries = list(data.values())[0]['rising']
        top_rising_queries = top_queries + rising_queries
        # Convert lists to DataFrames
        df_top_queries = pd.DataFrame(top_queries)
        df_rising_queries = pd.DataFrame(rising_queries)  # Added this line
        # Rename columns to avoid duplicates
        df_top_queries.columns = ['Top query', 'value']
        df_rising_queries.columns = ['Rising query', 'value']
        # Save to CSV
        all_queries_df = pd.concat([df_top_queries, df_rising_queries], axis=1)
        #all_queries_df.to_csv('related_queries.csv', index=False)
        # Display additional information
        console = Console()
        # Display additional information with emojis and bold formatting
        print("\n📢❗🚨 ")
        print("\n\033[1m🔝 Top\033[0m: The most popular search queries. Scoring is on a relative scale where a value of 100 is the most commonly searched query, 50 is a query searched half as often, and a value of 0 is a query searched for less than 1% as often as the most popular query.\n")
        print("\n\033[1m🚀 Rising\033[0m: Queries with the biggest increase in search frequency since the last time period. Results marked 'Breakout' had a tremendous increase, probably because these queries are new and had few (if any) prior searches.\n")
        # Display the DataFrame using tabulate
        table = tabulate(all_queries_df, headers='keys', tablefmt='fancy_grid')
        print(table)
        # Save the combined table to a file
        try:
            save_in_file(table)
        except Exception as save_results_err:
            logger.error(f"Failed to save search results: {save_results_err}")
        return top_rising_queries
    except Exception as e:
        print(f"get_related_queries_and_save_csv: ERROR: An error occurred: {e}")
 def get_related_topics_and_save_csv(search_keywords):
-    """
+    search_keywords = [f"{search_keywords}"]
    Get related topics for the given search keywords and save the result to a CSV file.
    Args:
        search_keywords (list): List of search keywords.
    Returns:
        pd.DataFrame: DataFrame containing related topics.
    """
    try:
        # Build model
        pytrends = TrendReq(hl='en-US', tz=360)
        pytrends.build_payload(kw_list=search_keywords, timeframe='today 12-m')
-        # Build payload
+        # Get related topics - this returns a dictionary
-        # FIXME: Remove hardcoding.
+        topics_data = pytrends.related_topics()
        pytrends.build_payload(search_keywords, cat=0, timeframe='today 12-m')
        # Get related topics
        try:
            data = pytrends.related_topics()
        except Exception as err:
            logger.error(f"Failed to get pytrends realted topics: {err}")
            return None
        # Extract data from the result
        top_topics = list(data.values())[0]['top']
        rising_topics = list(data.values())[0]['rising']
-        # Convert lists to DataFrames
+        # Extract data for the first keyword
-        df_top_topics = pd.DataFrame(top_topics)
+        if topics_data and search_keywords[0] in topics_data:
-        df_rising_topics = pd.DataFrame(rising_topics)
+            keyword_data = topics_data[search_keywords[0]]
-        
+            
-        # FIXME:Exclude specified columns
+            # Create two separate dataframes for top and rising
-        columns_to_exclude = ['hasData', 'value', 'topic_mid', 'link']
+            top_df = keyword_data.get('top', pd.DataFrame())
-        df_top_topics = df_top_topics.drop(columns=columns_to_exclude, errors='ignore')
+            rising_df = keyword_data.get('rising', pd.DataFrame())
-        df_rising_topics = df_rising_topics.drop(columns=columns_to_exclude, errors='ignore')
+            
-
+            return {
-        # Rename columns to avoid duplicates and provide meaningful names
+                'top': top_df[['topic_title', 'value']] if not top_df.empty else pd.DataFrame(),
-        df_top_topics.columns = ['Top- ' + col if col != 'topic_title' else col for col in df_top_topics.columns]
+                'rising': rising_df[['topic_title', 'value']] if not rising_df.empty else pd.DataFrame()
-        df_rising_topics.columns = ['Rising- ' + col if col != 'topic_title' else col for col in df_rising_topics.columns]
+            }
        all_topics_df = pd.concat([df_top_topics, df_rising_topics], axis=1)
        print(f"\n\n 📢❗🚨 Rising and Trending Keywords for {search_keywords}\n")
        print("\033[1m🔝 Top\033[0m: The most popular search topics.")
        print("\033[1m🚀 Rising\033[0m: Topics experiencing a significant increase in search frequency since the last time period. Topics marked :pile_of_poop:'Breakout' had a tremendous surge, likely because they are new and had few prior searches.")
        # Display the DataFrame using tabulate
        pd.set_option('display.max_rows', all_topics_df.shape[0]+1)
        print(all_topics_df.head(10))
        table = tabulate(all_topics_df, headers='keys', tablefmt='fancy_grid')
        try:
            save_in_file(table)
        except Exception as save_results_err:
            logger.error(f"Failed to save search results: {save_results_err}")
        return all_topics_df
    except Exception as e:
-        logger.error(f"ERROR: An error occurred in related topics: {e}")
+        logger.error(f"Error in related topics: {e}")
-        return pd.DataFrame()
+        return {'top': pd.DataFrame(), 'rising': pd.DataFrame()}
 def get_related_queries_and_save_csv(search_keywords):
    search_keywords = [f"{search_keywords}"]
    try:
        pytrends = TrendReq(hl='en-US', tz=360)
        pytrends.build_payload(kw_list=search_keywords, timeframe='today 12-m')
        # Get related queries - this returns a dictionary
        queries_data = pytrends.related_queries()
        # Extract data for the first keyword
        if queries_data and search_keywords[0] in queries_data:
            keyword_data = queries_data[search_keywords[0]]
            # Create two separate dataframes for top and rising
            top_df = keyword_data.get('top', pd.DataFrame())
            rising_df = keyword_data.get('rising', pd.DataFrame())
            return {
                'top': top_df if not top_df.empty else pd.DataFrame(),
                'rising': rising_df if not rising_df.empty else pd.DataFrame()
            }
    except Exception as e:
        logger.error(f"Error in related queries: {e}")
        return {'top': pd.DataFrame(), 'rising': pd.DataFrame()}
 def get_source(url):
@@ -507,22 +438,17 @@ def do_google_trends_analysis(search_term):
            else:
                all_the_keywords.append(suggestions_df['Keywords'].tolist())
            all_the_keywords = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in all_the_keywords])
            # Generate a random sleep time between 2 and 3 seconds 
            time.sleep(random.uniform(2, 3))
-
+        
 #        
 #        # FIXME: Get result from vision GPT. Fetch and visualize Google Trends data
 #        #trends_data = fetch_google_trends_interest_overtime("llamaindex")
 #
 #        # FIXME: Plot Interest Over time.
 #        result_df = plot_interest_by_region(search_term)
 #        
        # Display additional information
        try:
            result_df = get_related_topics_and_save_csv(search_term)
            logger.info(f"Related topics:: result_df: {result_df}")
            # Extract 'Top' topic_title
            if result_df:
-                top_topic_title = result_df['topic_title'].values.tolist()
+                top_topic_title = result_df['top']['topic_title'].values.tolist()
                # Join each sublist into one string separated by comma
                #top_topic_title = [','.join(filter(None, map(str, sublist))) for sublist in top_topic_title]
                top_topic_title = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in top_topic_title])
@@ -551,3 +477,77 @@ def do_google_trends_analysis(search_term):
        return(all_the_keywords)
    except Exception as e:
        logger.error(f"Error in Google Trends Analysis: {e}")
 def get_trending_searches(country='united_states'):
    """Get trending searches for a specific country."""
    try:
        pytrends = TrendReq(hl='en-US', tz=360)
        trending_searches = pytrends.trending_searches(pn=country)
        return trending_searches
    except Exception as e:
        logger.error(f"Error getting trending searches: {e}")
        return pd.DataFrame()
 def display_trending_searches(trending_df):
    """Display trending searches in the UI."""
    if trending_df.empty:
        st.info("No trending searches data available.")
        return
    st.subheader("📊 Trending Searches")
    # Display as numbered list with emojis
    for idx, search in enumerate(trending_df[0].head(10), 1):
        st.write(f"{idx}. 🔍 {search}")
 def get_realtime_trends(country='US'):
    """Get realtime trending searches for a specific country."""
    try:
        pytrends = TrendReq(hl='en-US', tz=360)
        realtime_trends = pytrends.realtime_trending_searches(pn=country)
        return realtime_trends
    except Exception as e:
        logger.error(f"Error getting realtime trends: {e}")
        return pd.DataFrame()
 def display_realtime_trends(trends_df):
    """Display realtime trending searches in the UI."""
    if trends_df.empty:
        st.info("No realtime trends data available.")
        return
    st.subheader("⚡ Realtime Trends")
    # Create tabs for different categories
    if not trends_df.empty:
        # Display top 5 trends with their titles and articles
        for _, row in trends_df.head(5).iterrows():
            with st.expander(f"🔥 {row.get('title', 'Trending Topic')}"):
                st.write(f"**Traffic:** {row.get('traffic', 'N/A')}")
                if 'articles' in row:
                    st.write("📰 Related Articles:")
                    for article in row['articles'][:3]:  # Show top 3 articles
                        st.write(f"- {article['title']}")
 def display_google_trends_data(trends_data, search_keyword):
    # ... existing code ...
    # Create tabs for different sections
    tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
        "Related Keywords", 
        "Interest Over Time", 
        "Regional Interest", 
        "Related Queries", 
        "Related Topics",
        "Trending Now"
    ])
    # ... existing tab code ...
    with tab6:
        col1, col2 = st.columns(2)
        with col1:
            display_trending_searches(trends_data.get('trending_searches', pd.DataFrame()))
        with col2:
            display_realtime_trends(trends_data.get('realtime_trends', pd.DataFrame()))
--- a/lib/ai_web_researcher/gpt_online_researcher.py
+++ b/lib/ai_web_researcher/gpt_online_researcher.py
@@ -27,15 +27,19 @@ from pathlib import Path
 import sys
 from datetime import datetime
 import streamlit as st
 import pandas as pd
 import random
 import numpy as np
 from lib.alwrity_ui.display_google_serp_results import (
    process_research_results,
    process_search_results,
    display_research_results
 )
 from lib.alwrity_ui.google_trends_ui import display_google_trends_data, process_trends_data
 from .tavily_ai_search import get_tavilyai_results
-from .metaphor_basic_neural_web_search import metaphor_search_articles
+from .metaphor_basic_neural_web_search import metaphor_search_articles, streamlit_display_metaphor_results
 from .google_serp_search import google_search
 from .google_trends_researcher import do_google_trends_analysis
 #from .google_gemini_web_researcher import do_gemini_web_research
@@ -56,6 +60,10 @@ def gpt_web_researcher(search_keywords, search_mode, **kwargs):
    logger.debug(f"Additional parameters: {kwargs}")
    try:
        # Reset session state variables for this research operation
        if 'metaphor_results_displayed' in st.session_state:
            del st.session_state.metaphor_results_displayed
        # Initialize result container
        research_results = None
@@ -157,13 +165,76 @@ def gpt_web_researcher(search_keywords, search_mode, **kwargs):
                    update_progress("Metaphor AI search failed, continuing with Tavily results only...", level="warning")
                else:
                    update_progress("Metaphor AI search completed successfully", progress=75)
                    # Add debug logging to check the structure of metaphor_results
                    logger.debug(f"Metaphor results structure: {type(metaphor_results)}")
                    if isinstance(metaphor_results, dict):
                        logger.debug(f"Metaphor results keys: {metaphor_results.keys()}")
                        if 'data' in metaphor_results:
                            logger.debug(f"Metaphor data keys: {metaphor_results['data'].keys()}")
                            if 'results' in metaphor_results['data']:
                                logger.debug(f"Number of results: {len(metaphor_results['data']['results'])}")
                    # Display Metaphor results only if not already displayed
                    if 'metaphor_results_displayed' not in st.session_state:
                        st.session_state.metaphor_results_displayed = True
                        # Make sure to pass the correct parameters to streamlit_display_metaphor_results
                        streamlit_display_metaphor_results(metaphor_results, search_keywords)
                # Add Google Trends Analysis
                update_progress("Initiating Google Trends analysis...", progress=80)
                try:
                    # Add an informative message about Google Trends
                    with st.expander("ℹ️ About Google Trends Analysis", expanded=False):
                        st.markdown("""
                        **What is Google Trends Analysis?**
                        Google Trends Analysis provides insights into how often a particular search-term is entered relative to the total search-volume across various regions of the world, and in various languages.
                        **What data will be shown?**
                        - **Related Keywords**: Terms that are frequently searched together with your keyword
                        - **Interest Over Time**: How interest in your keyword has changed over the past 12 months
                        - **Regional Interest**: Where in the world your keyword is most popular
                        - **Related Queries**: What people search for before and after searching for your keyword
                        - **Related Topics**: Topics that are closely related to your keyword
                        **How to use this data:**
                        - Identify trending topics in your industry
                        - Understand seasonal patterns in search behavior
                        - Discover related keywords for content planning
                        - Target content to specific regions with high interest
                        """)
                    trends_results = do_google_pytrends_analysis(search_keywords)
                    if trends_results:
                        update_progress("Google Trends analysis completed successfully", progress=90)
                        # Store trends results in the research_results
                        if metaphor_results:
                            metaphor_results['trends_data'] = trends_results
                        else:
                            # If metaphor_results is None, create a new container for results
                            metaphor_results = {'trends_data': trends_results}
                        # Display Google Trends data using the new UI module
                        display_google_trends_data(trends_results, search_keywords)
                    else:
                        update_progress("Google Trends analysis returned no results", level="warning")
                except Exception as trends_err:
                    logger.error(f"Google Trends analysis failed: {trends_err}")
                    update_progress("Google Trends analysis failed", level="warning")
                    st.error(f"Error in Google Trends analysis: {str(trends_err)}")
                # Return the combined results
                update_progress("Research completed!", progress=100, level="success")
                return metaphor_results or t_results
            except Exception as ai_err:
                error_msg = f"AI research pipeline failed: {str(ai_err)}"
                logger.error(error_msg, exc_info=True)
                update_progress(error_msg, level="error")
                raise
-                
+
        else:
            error_msg = f"Unsupported search mode: {search_mode}"
            logger.error(error_msg)
@@ -316,13 +387,355 @@ def do_metaphor_ai_research(search_keywords):
        return None, None
-def do_google_pytrends_analysis(search_keywords):
+def do_google_pytrends_analysis(keywords):
-    """ """
+    """
    Perform Google Trends analysis for the given keywords.
    Args:
        keywords (str): The search keywords to analyze
    Returns:
        dict: A dictionary containing formatted Google Trends data with the following keys:
            - related_keywords: List of related keywords
            - interest_over_time: DataFrame with date and interest columns
            - regional_interest: DataFrame with country_code, country, and interest columns
            - related_queries: DataFrame with query and value columns
            - related_topics: DataFrame with topic and value columns
    """
    logger.info(f"Performing Google Trends analysis for keywords: {keywords}")
    # Create a progress container for Streamlit
    progress_container = st.empty()
    progress_bar = st.progress(0)
    def update_progress(message, progress=None, level="info"):
        """Helper function to update progress in Streamlit UI"""
        if progress is not None:
            progress_bar.progress(progress)
        if level == "error":
            progress_container.error(f"🚫 {message}")
        elif level == "warning":
            progress_container.warning(f"⚠️ {message}")
        else:
            progress_container.info(f"🔄 {message}")
        logger.debug(f"Progress update [{level}]: {message}")
    try:
-        logger.info(f"Do Google Trends analysis for given keywords: {search_keywords}")
+        # Initialize the formatted data dictionary
-        return(do_google_trends_analysis(search_keywords))
+        formatted_data = {
-    except Exception as err:
+            'related_keywords': [],
-        logger.error(f"Failed to do google trends analysis: {err}")
+            'interest_over_time': pd.DataFrame(),
            'regional_interest': pd.DataFrame(),
            'related_queries': pd.DataFrame(),
            'related_topics': pd.DataFrame()
        }
        # Get raw trends data from google_trends_researcher
        update_progress("Fetching Google Trends data...", progress=10)
        raw_trends_data = do_google_trends_analysis(keywords)
        if not raw_trends_data:
            logger.warning("No Google Trends data returned")
            update_progress("No Google Trends data returned", level="warning", progress=20)
            return formatted_data
        # Process related keywords from the raw data
        update_progress("Processing related keywords...", progress=30)
        if isinstance(raw_trends_data, list):
            formatted_data['related_keywords'] = raw_trends_data
        elif isinstance(raw_trends_data, dict):
            if 'keywords' in raw_trends_data:
                formatted_data['related_keywords'] = raw_trends_data['keywords']
            if 'interest_over_time' in raw_trends_data:
                formatted_data['interest_over_time'] = raw_trends_data['interest_over_time']
            if 'regional_interest' in raw_trends_data:
                formatted_data['regional_interest'] = raw_trends_data['regional_interest']
            if 'related_queries' in raw_trends_data:
                formatted_data['related_queries'] = raw_trends_data['related_queries']
            if 'related_topics' in raw_trends_data:
                formatted_data['related_topics'] = raw_trends_data['related_topics']
        # If we have keywords but missing other data, try to fetch them using pytrends directly
        if formatted_data['related_keywords'] and (
            formatted_data['interest_over_time'].empty or 
            formatted_data['regional_interest'].empty or 
            formatted_data['related_queries'].empty or 
            formatted_data['related_topics'].empty
        ):
            try:
                update_progress("Fetching additional data from Google Trends API...", progress=40)
                from pytrends.request import TrendReq
                pytrends = TrendReq(hl='en-US', tz=360)
                # Build payload with the main keyword
                update_progress("Building search payload...", progress=45)
                pytrends.build_payload([keywords], timeframe='today 12-m', geo='')
                # Get interest over time if missing
                if formatted_data['interest_over_time'].empty:
                    try:
                        update_progress("Fetching interest over time data...", progress=50)
                        interest_df = pytrends.interest_over_time()
                        if not interest_df.empty:
                            formatted_data['interest_over_time'] = interest_df.reset_index()
                            update_progress(f"Successfully fetched interest over time data with {len(formatted_data['interest_over_time'])} data points", progress=55)
                        else:
                            update_progress("No interest over time data available", level="warning", progress=55)
                    except Exception as e:
                        logger.error(f"Error fetching interest over time: {e}")
                        update_progress(f"Error fetching interest over time: {str(e)}", level="warning", progress=55)
                # Get regional interest if missing
                if formatted_data['regional_interest'].empty:
                    try:
                        update_progress("Fetching regional interest data...", progress=60)
                        regional_df = pytrends.interest_by_region()
                        if not regional_df.empty:
                            formatted_data['regional_interest'] = regional_df.reset_index()
                            update_progress(f"Successfully fetched regional interest data for {len(formatted_data['regional_interest'])} regions", progress=65)
                        else:
                            update_progress("No regional interest data available", level="warning", progress=65)
                    except Exception as e:
                        logger.error(f"Error fetching regional interest: {e}")
                        update_progress(f"Error fetching regional interest: {str(e)}", level="warning", progress=65)
                # Get related queries if missing
                if formatted_data['related_queries'].empty:
                    try:
                        update_progress("Fetching related queries data...", progress=70)
                        # Get related queries data
                        related_queries = pytrends.related_queries()
                        # Create empty DataFrame as fallback
                        formatted_data['related_queries'] = pd.DataFrame(columns=['query', 'value'])
                        # Simple direct approach to avoid list index errors
                        if related_queries and isinstance(related_queries, dict):
                            # Check if our keyword exists in the results
                            if keywords in related_queries:
                                keyword_data = related_queries[keywords]
                                # Process top queries if available
                                if 'top' in keyword_data and keyword_data['top'] is not None:
                                    try:
                                        update_progress("Processing top related queries...", progress=75)
                                        # Convert to DataFrame if it's not already
                                        if isinstance(keyword_data['top'], pd.DataFrame):
                                            top_df = keyword_data['top']
                                        else:
                                            # Try to convert to DataFrame
                                            top_df = pd.DataFrame(keyword_data['top'])
                                        # Ensure it has the right columns
                                        if not top_df.empty:
                                            # Rename columns if needed
                                            if 'query' in top_df.columns:
                                                # Already has the right column name
                                                pass
                                            elif len(top_df.columns) > 0:
                                                # Use first column as query
                                                top_df = top_df.rename(columns={top_df.columns[0]: 'query'})
                                            # Add to our results
                                            formatted_data['related_queries'] = top_df
                                            update_progress(f"Successfully processed {len(top_df)} top related queries", progress=80)
                                    except Exception as e:
                                        logger.warning(f"Error processing top queries: {e}")
                                        update_progress(f"Error processing top queries: {str(e)}", level="warning", progress=80)
                                # Process rising queries if available
                                if 'rising' in keyword_data and keyword_data['rising'] is not None:
                                    try:
                                        update_progress("Processing rising related queries...", progress=85)
                                        # Convert to DataFrame if it's not already
                                        if isinstance(keyword_data['rising'], pd.DataFrame):
                                            rising_df = keyword_data['rising']
                                        else:
                                            # Try to convert to DataFrame
                                            rising_df = pd.DataFrame(keyword_data['rising'])
                                        # Ensure it has the right columns
                                        if not rising_df.empty:
                                            # Rename columns if needed
                                            if 'query' in rising_df.columns:
                                                # Already has the right column name
                                                pass
                                            elif len(rising_df.columns) > 0:
                                                # Use first column as query
                                                rising_df = rising_df.rename(columns={rising_df.columns[0]: 'query'})
                                            # Combine with existing data if we have any
                                            if not formatted_data['related_queries'].empty:
                                                formatted_data['related_queries'] = pd.concat([formatted_data['related_queries'], rising_df])
                                                update_progress(f"Successfully processed {len(rising_df)} rising related queries", progress=90)
                                            else:
                                                formatted_data['related_queries'] = rising_df
                                                update_progress(f"Successfully processed {len(rising_df)} rising related queries", progress=90)
                                    except Exception as e:
                                        logger.warning(f"Error processing rising queries: {e}")
                                        update_progress(f"Error processing rising queries: {str(e)}", level="warning", progress=90)
                    except Exception as e:
                        logger.error(f"Error fetching related queries: {e}")
                        update_progress(f"Error fetching related queries: {str(e)}", level="warning", progress=90)
                        # Ensure we have an empty DataFrame with the right columns
                        formatted_data['related_queries'] = pd.DataFrame(columns=['query', 'value'])
                # Get related topics if missing
                if formatted_data['related_topics'].empty:
                    try:
                        update_progress("Fetching related topics data...", progress=95)
                        # Get related topics data
                        related_topics = pytrends.related_topics()
                        # Create empty DataFrame as fallback
                        formatted_data['related_topics'] = pd.DataFrame(columns=['topic', 'value'])
                        # Simple direct approach to avoid list index errors
                        if related_topics and isinstance(related_topics, dict):
                            # Check if our keyword exists in the results
                            if keywords in related_topics:
                                keyword_data = related_topics[keywords]
                                # Process top topics if available
                                if 'top' in keyword_data and keyword_data['top'] is not None:
                                    try:
                                        update_progress("Processing top related topics...", progress=97)
                                        # Convert to DataFrame if it's not already
                                        if isinstance(keyword_data['top'], pd.DataFrame):
                                            top_df = keyword_data['top']
                                        else:
                                            # Try to convert to DataFrame
                                            top_df = pd.DataFrame(keyword_data['top'])
                                        # Ensure it has the right columns
                                        if not top_df.empty:
                                            # Rename columns if needed
                                            if 'topic_title' in top_df.columns:
                                                top_df = top_df.rename(columns={'topic_title': 'topic'})
                                            elif len(top_df.columns) > 0 and 'topic' not in top_df.columns:
                                                # Use first column as topic
                                                top_df = top_df.rename(columns={top_df.columns[0]: 'topic'})
                                            # Add to our results
                                            formatted_data['related_topics'] = top_df
                                            update_progress(f"Successfully processed {len(top_df)} top related topics", progress=98)
                                    except Exception as e:
                                        logger.warning(f"Error processing top topics: {e}")
                                        update_progress(f"Error processing top topics: {str(e)}", level="warning", progress=98)
                                # Process rising topics if available
                                if 'rising' in keyword_data and keyword_data['rising'] is not None:
                                    try:
                                        update_progress("Processing rising related topics...", progress=99)
                                        # Convert to DataFrame if it's not already
                                        if isinstance(keyword_data['rising'], pd.DataFrame):
                                            rising_df = keyword_data['rising']
                                        else:
                                            # Try to convert to DataFrame
                                            rising_df = pd.DataFrame(keyword_data['rising'])
                                        # Ensure it has the right columns
                                        if not rising_df.empty:
                                            # Rename columns if needed
                                            if 'topic_title' in rising_df.columns:
                                                rising_df = rising_df.rename(columns={'topic_title': 'topic'})
                                            elif len(rising_df.columns) > 0 and 'topic' not in rising_df.columns:
                                                # Use first column as topic
                                                rising_df = rising_df.rename(columns={rising_df.columns[0]: 'topic'})
                                            # Combine with existing data if we have any
                                            if not formatted_data['related_topics'].empty:
                                                formatted_data['related_topics'] = pd.concat([formatted_data['related_topics'], rising_df])
                                                update_progress(f"Successfully processed {len(rising_df)} rising related topics", progress=100)
                                            else:
                                                formatted_data['related_topics'] = rising_df
                                                update_progress(f"Successfully processed {len(rising_df)} rising related topics", progress=100)
                                    except Exception as e:
                                        logger.warning(f"Error processing rising topics: {e}")
                                        update_progress(f"Error processing rising topics: {str(e)}", level="warning", progress=100)
                    except Exception as e:
                        logger.error(f"Error fetching related topics: {e}")
                        update_progress(f"Error fetching related topics: {str(e)}", level="warning", progress=100)
                        # Ensure we have an empty DataFrame with the right columns
                        formatted_data['related_topics'] = pd.DataFrame(columns=['topic', 'value'])
            except Exception as e:
                logger.error(f"Error fetching additional trends data: {e}")
                update_progress(f"Error fetching additional trends data: {str(e)}", level="warning", progress=100)
        # Ensure all DataFrames have the correct column names for the UI
        update_progress("Finalizing data formatting...", progress=100)
        if not formatted_data['interest_over_time'].empty:
            if 'date' not in formatted_data['interest_over_time'].columns:
                formatted_data['interest_over_time'] = formatted_data['interest_over_time'].reset_index()
            if 'interest' not in formatted_data['interest_over_time'].columns and keywords in formatted_data['interest_over_time'].columns:
                formatted_data['interest_over_time'] = formatted_data['interest_over_time'].rename(columns={keywords: 'interest'})
        if not formatted_data['regional_interest'].empty:
            if 'country_code' not in formatted_data['regional_interest'].columns and 'geoName' in formatted_data['regional_interest'].columns:
                formatted_data['regional_interest'] = formatted_data['regional_interest'].rename(columns={'geoName': 'country_code'})
            if 'interest' not in formatted_data['regional_interest'].columns and keywords in formatted_data['regional_interest'].columns:
                formatted_data['regional_interest'] = formatted_data['regional_interest'].rename(columns={keywords: 'interest'})
        if not formatted_data['related_queries'].empty:
            # Handle different column names that might be present in the related queries DataFrame
            if 'query' not in formatted_data['related_queries'].columns:
                if 'Top query' in formatted_data['related_queries'].columns:
                    formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={'Top query': 'query'})
                elif 'Rising query' in formatted_data['related_queries'].columns:
                    formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={'Rising query': 'query'})
                elif 'query' not in formatted_data['related_queries'].columns and len(formatted_data['related_queries'].columns) > 0:
                    # If we have a DataFrame but no 'query' column, use the first column as 'query'
                    first_col = formatted_data['related_queries'].columns[0]
                    formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={first_col: 'query'})
            if 'value' not in formatted_data['related_queries'].columns and len(formatted_data['related_queries'].columns) > 1:
                # If we have a second column, use it as 'value'
                second_col = formatted_data['related_queries'].columns[1]
                formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={second_col: 'value'})
            elif 'value' not in formatted_data['related_queries'].columns:
                # If no 'value' column exists, add one with default values
                formatted_data['related_queries']['value'] = 0
        if not formatted_data['related_topics'].empty:
            # Handle different column names that might be present in the related topics DataFrame
            if 'topic' not in formatted_data['related_topics'].columns:
                if 'topic_title' in formatted_data['related_topics'].columns:
                    formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={'topic_title': 'topic'})
                elif 'topic' not in formatted_data['related_topics'].columns and len(formatted_data['related_topics'].columns) > 0:
                    # If we have a DataFrame but no 'topic' column, use the first column as 'topic'
                    first_col = formatted_data['related_topics'].columns[0]
                    formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={first_col: 'topic'})
            if 'value' not in formatted_data['related_topics'].columns and len(formatted_data['related_topics'].columns) > 1:
                # If we have a second column, use it as 'value'
                second_col = formatted_data['related_topics'].columns[1]
                formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={second_col: 'value'})
            elif 'value' not in formatted_data['related_topics'].columns:
                # If no 'value' column exists, add one with default values
                formatted_data['related_topics']['value'] = 0
        # Clear the progress container after completion
        progress_container.empty()
        progress_bar.empty()
        return formatted_data
    except Exception as e:
        logger.error(f"Error in Google Trends analysis: {e}")
        update_progress(f"Error in Google Trends analysis: {str(e)}", level="error", progress=100)
        # Clear the progress container after error
        progress_container.empty()
        progress_bar.empty()
        return {
            'related_keywords': [],
            'interest_over_time': pd.DataFrame(),
            'regional_interest': pd.DataFrame(),
            'related_queries': pd.DataFrame(),
            'related_topics': pd.DataFrame()
        }
 def metaphor_extract_titles_or_text(json_data, return_titles=True):
--- a/lib/ai_web_researcher/metaphor_basic_neural_web_search.py
+++ b/lib/ai_web_researcher/metaphor_basic_neural_web_search.py
@@ -262,285 +262,185 @@ def metaphor_search_articles(query, search_options: dict = None):
        except Exception as tavily_err:
            logger.warning(f"Error getting Tavily answer: {tavily_err}")
-        # Display results in Streamlit
+        # Return the formatted response without displaying it
-        streamlit_display_metaphor_results(formatted_response)
+        # The display will be handled by gpt_web_researcher
        return formatted_response
    except Exception as e:
        logger.error(f"Error in Exa searching articles: {e}")
        return None
-def streamlit_display_metaphor_results(metaphor_response: dict):
+def streamlit_display_metaphor_results(metaphor_response, search_keywords=None):
-    """
+    """Display Metaphor search results in Streamlit."""
    Display Metaphor search results in Streamlit with enhanced metrics and popovers
-    Args:
+    if not metaphor_response:
-        metaphor_response (dict): Response from Metaphor search
+        st.error("No search results found.")
    """
    if not metaphor_response or 'data' not in metaphor_response:
        st.error("No valid Metaphor search results to display")
        return
-
+    
    # Add debug logging
    logger.debug(f"Displaying Metaphor results. Type: {type(metaphor_response)}")
    if isinstance(metaphor_response, dict):
        logger.debug(f"Metaphor response keys: {metaphor_response.keys()}")
    # Initialize session state variables if they don't exist
    if 'search_insights' not in st.session_state:
        st.session_state.search_insights = None
    if 'metaphor_response' not in st.session_state:
-        st.session_state.metaphor_response = metaphor_response
+        st.session_state.metaphor_response = None
    if 'insights_generated' not in st.session_state:
        st.session_state.insights_generated = False
-    # Update the stored metaphor_response with the latest data
+    # Store the current response in session state
    st.session_state.metaphor_response = metaphor_response
    # Display metrics in columns
    col1, col2, col3 = st.columns(3)
-    # Calculate metrics
+    # Display search results
-    results = metaphor_response['data']['results']
+    st.subheader("🔍 Search Results")
    # Calculate metrics - handle different data structures
    results = []
    if isinstance(metaphor_response, dict):
        if 'data' in metaphor_response and 'results' in metaphor_response['data']:
            results = metaphor_response['data']['results']
        elif 'results' in metaphor_response:
            results = metaphor_response['results']
    total_results = len(results)
-    avg_score = sum(r['score'] for r in results if r['score']) / total_results if total_results > 0 else 0
+    avg_relevance = sum(r.get('score', 0) for r in results) / total_results if total_results > 0 else 0
    # Display metrics
    col1, col2 = st.columns(2)
    with col1:
-        st.metric(
+        st.metric("Total Results", total_results)
            label="Total Results",
            value=total_results
        )
    with col2:
-        if metaphor_response['data'].get('costDollars'):
+        st.metric("Average Relevance Score", f"{avg_relevance:.2f}")
            cost = metaphor_response['data']['costDollars']
            st.metric(
                label="Search Cost",
                value=f"${cost['total']:.3f}"
            )
    with col3:
        st.metric(
            label="Average Relevance Score",
            value=f"{avg_score:.2f}"
        )
    # Display AI-generated answers side by side
    if 'answer' in metaphor_response or 'tavily_answer' in metaphor_response:
        st.markdown("### 🤖 AI-Generated Research Answers")
        # Create two columns for side-by-side display
        tavily_col, metaphor_col = st.columns(2)
        # Display Tavily answer if available
        with tavily_col:
            if 'tavily_answer' in metaphor_response:
                st.markdown("#### 🔍 Tavily AI Answer")
                st.markdown(f"""
                <div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 5px solid #FF4B4B;">
                    {metaphor_response['tavily_answer']}
                </div>
                """, unsafe_allow_html=True)
                if metaphor_response.get('tavily_cost_dollars'):
                    st.caption(f"Tavily Answer Cost: ${metaphor_response['tavily_cost_dollars']['total']:.3f}")
                if metaphor_response.get('tavily_citations'):
                    with st.expander("📚 Tavily Sources"):
                        for idx, citation in enumerate(metaphor_response['tavily_citations'], 1):
                            st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})")
            else:
                st.markdown("#### 🔍 Tavily AI Answer")
                st.info("No Tavily answer available for this query.")
        # Display Metaphor answer if available
        with metaphor_col:
            if 'answer' in metaphor_response:
                st.markdown("#### 🔍 Metaphor AI Answer")
                st.markdown(f"""
                <div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 5px solid #4CAF50;">
                    {metaphor_response['answer']}
                </div>
                """, unsafe_allow_html=True)
                if metaphor_response.get('answerCostDollars'):
                    st.caption(f"Metaphor Answer Cost: ${metaphor_response['answerCostDollars']['total']:.3f}")
                if metaphor_response.get('citations'):
                    with st.expander("📚 Metaphor Sources"):
                        for idx, citation in enumerate(metaphor_response['citations'], 1):
                            st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})")
            else:
                st.markdown("#### 🔍 Metaphor AI Answer")
                st.info("No Metaphor answer available for this query.")
-    # Add "Get Search Insights" button - moved outside the AI answers conditional
+    # Display AI-generated answers if available
-    st.markdown("### 🔍 Search Insights")
+    if 'tavily_answer' in metaphor_response or 'metaphor_answer' in metaphor_response:
        st.subheader("🤖 AI-Generated Answers")
        if 'tavily_answer' in metaphor_response:
            st.markdown("**Tavily AI Answer:**")
            st.write(metaphor_response['tavily_answer'])
        if 'metaphor_answer' in metaphor_response:
            st.markdown("**Metaphor AI Answer:**")
            st.write(metaphor_response['metaphor_answer'])
-    # Create a container for the insights
+    # Get Search Insights button
-    insights_container = st.container()
+    if st.button("Generate Search Insights", key="metaphor_generate_insights_button"):
    # Use a button with a callback function
    if st.button("Generate Search Insights", type="primary"):
        # Set a flag in session state to indicate that insights should be generated
        st.session_state.insights_generated = True
-        
+        st.rerun()
        # Store the current metaphor_response in session state
        st.session_state.metaphor_response = metaphor_response
        # Redirect to the same page with a query parameter to trigger insights generation
        st.experimental_rerun()
    # If insights should be generated, do it in a separate container
    if st.session_state.insights_generated:
        with insights_container:
            with st.spinner("Analyzing search results to generate insights..."):
                # Get the stored metaphor_response from session state
                stored_response = st.session_state.metaphor_response
                stored_results = stored_response['data']['results']
                # Prepare data for analysis
                analysis_data = {
                    "metaphor_results": stored_results,
                    "metaphor_answer": stored_response.get("answer", ""),
                    "tavily_answer": stored_response.get("tavily_answer", ""),
                    "metaphor_citations": stored_response.get("citations", []),
                    "tavily_citations": stored_response.get("tavily_citations", [])
                }
                # Create the analysis prompt
                analysis_prompt = f"""
                **Search Intent & User Needs Analysis**
                I have conducted research using both Tavily and Metaphor AI search engines. 
                Below is the data from both sources:
                **Metaphor AI Answer:**
                {analysis_data["metaphor_answer"]}
                **Tavily AI Answer:**
                {analysis_data["tavily_answer"]}
                **Search Results:**
                {[f"{i+1}. {r['title']} - {r['summary']}" for i, r in enumerate(analysis_data["metaphor_results"])]}
                **Citations:**
                {[f"{i+1}. {c.get('title', 'Untitled')} - {c.get('url', 'No URL')}" for i, c in enumerate(analysis_data["metaphor_citations"] + analysis_data["tavily_citations"])]}
                Based on this research data, please provide the following insights:
                **Search Intent & User Needs**
                ```
                Review the research data and identify:
                1. The distribution of search intent (categorize as Informational/Commercial/Navigational/Transactional)
                2. Most common user questions and their patterns
                3. Frequently mentioned pain points or challenges
                4. Recurring solutions or approaches to addressing these challenges
                5. Gaps between user questions and available answers
                Present findings in a structured format with percentages and specific examples.
                ```
                Format your response as a comprehensive analysis with clear sections, bullet points, and examples from the research data.
                """
                try:
                    # Import the llm_text_gen function
                    import importlib
                    text_gen_module = importlib.import_module('lib.gpt_providers.text_generation.main_text_generation')
                    if hasattr(text_gen_module, 'llm_text_gen'):
                        # Generate insights using llm_text_gen
                        insights = text_gen_module.llm_text_gen(analysis_prompt)
                        # Store insights in session state
                        st.session_state.search_insights = insights
                        # Reset the flag to prevent regeneration on next rerun
                        st.session_state.insights_generated = False
                    else:
                        st.error("Could not find llm_text_gen function in the text generation module.")
                except Exception as e:
                    st.error(f"Error generating insights: {str(e)}")
                    logger.error(f"Error generating insights: {e}")
    # Display insights if they exist in session state
    if st.session_state.search_insights:
-        with insights_container:
+        st.subheader("🔍 Search Insights")
-            st.markdown("### 🔍 Search Intent & User Needs Analysis")
+        st.write(st.session_state.search_insights)
-            st.markdown(st.session_state.search_insights)
+    
-
+    # Display search results in a data editor
-    # Create DataFrame from results
+    st.subheader("📊 Detailed Results")
    df = pd.DataFrame(results)
    # Prepare data for display
-    display_df = df.copy()
+    results_data = []
-    display_df['Visit Site'] = display_df['url']
+    for result in results:
        result_data = {
            'Title': result.get('title', ''),
            'URL': result.get('url', ''),
            'Snippet': result.get('summary', ''),
            'Relevance Score': result.get('score', 0),
            'Published Date': result.get('publishedDate', '')
        }
        results_data.append(result_data)
-    # Format publishedDate as string if it exists
+    # Create DataFrame
-    if 'publishedDate' in display_df.columns:
+    df = pd.DataFrame(results_data)
-        display_df['publishedDate'] = display_df['publishedDate'].apply(
+    
-            lambda x: x[:10] if isinstance(x, str) else 'N/A'
+    # Display the DataFrame if it's not empty
    if not df.empty:
        # Configure columns
        st.dataframe(
            df,
            column_config={
                "Title": st.column_config.TextColumn(
                    "Title",
                    help="Title of the search result",
                    width="large",
                ),
                "URL": st.column_config.LinkColumn(
                    "URL",
                    help="Link to the search result",
                    width="medium",
                    display_text="Visit Article",
                ),
                "Snippet": st.column_config.TextColumn(
                    "Snippet",
                    help="Summary of the search result",
                    width="large",
                ),
                "Relevance Score": st.column_config.NumberColumn(
                    "Relevance Score",
                    help="Relevance score of the search result",
                    format="%.2f",
                    width="small",
                ),
                "Published Date": st.column_config.DateColumn(
                    "Published Date",
                    help="Publication date of the search result",
                    width="medium",
                ),
            },
            hide_index=True,
        )
-
+        
-    # Configure columns for data editor
+        # Add popover for snippets
-    columns = {
+        st.markdown("""
-        'title': st.column_config.TextColumn(
+        <style>
-            'Title',
+        .snippet-popover {
-            width='large',
+            position: relative;
-            required=True,
+            display: inline-block;
-        ),
+        }
-        'author': st.column_config.TextColumn(
+        .snippet-popover .snippet-content {
-            'Author',
+            visibility: hidden;
-            width='medium',
+            width: 300px;
-        ),
+            background-color: #f9f9f9;
-        'publishedDate': st.column_config.TextColumn(
+            color: #333;
-            'Published Date',
+            text-align: left;
-            width='medium',
+            border-radius: 6px;
-        ),
+            padding: 10px;
-        'score': st.column_config.NumberColumn(
+            position: absolute;
-            'Relevance Score',
+            z-index: 1;
-            width='small',
+            bottom: 125%;
-            format="%.2f"
+            left: 50%;
-        ),
+            margin-left: -150px;
-        'Visit Site': st.column_config.LinkColumn(
+            opacity: 0;
-            'Link',
+            transition: opacity 0.3s;
-            width='small',
+            box-shadow: 0 2px 5px rgba(0,0,0,0.2);
-            display_text='Visit Site',
+        }
-        ),
+        .snippet-popover:hover .snippet-content {
-        'summary': st.column_config.TextColumn(
+            visibility: visible;
-            'Summary',
+            opacity: 1;
-            width='large',
+        }
-            required=True,
+        </style>
-        )
+        """, unsafe_allow_html=True)
-    }
+        
-
+        # Display snippets with popover
-    # Display results in data editor
+        st.subheader("📝 Snippets")
-    st.data_editor(
+        for i, result in enumerate(results):
-        display_df,
+            snippet = result.get('summary', '')
-        column_config=columns,
+            if snippet:
-        hide_index=True,
+                st.markdown(f"""
-        num_rows='dynamic',
+                <div class="snippet-popover">
-        disabled=True,
+                    <strong>{result.get('title', '')}</strong>
-        column_order=['title', 'author', 'publishedDate', 'score', 'summary', 'Visit Site']
+                    <div class="snippet-content">
-    )
+                        {snippet}
-
+                    </div>
-    # Display detailed summaries with popovers
+                </div>
-    st.write("### Detailed Summaries")
+                """, unsafe_allow_html=True)
-    for idx, result in enumerate(results, 1):
+    else:
-        with st.expander(f"📄 {result['title']}", expanded=False):
+        st.info("No detailed results available.")
-            col1, col2 = st.columns([3, 1])
+    
-            with col1:
+    # Add a collapsible section for the raw JSON data
-                st.markdown(f"**Summary**")
+    with st.expander("Research Results (JSON)", expanded=False):
-                st.markdown(result['summary'])
+        st.json(metaphor_response)
            with col2:
                st.markdown("**Details**")
                st.markdown(f"**Author:** {result['author'] if result['author'] else 'N/A'}")
                st.markdown(f"**Published:** {result['publishedDate'][:10] if result['publishedDate'] else 'N/A'}")
                st.markdown(f"**Score:** {result['score']:.2f}")
                st.markdown(f"[Visit Site]({result['url']})")
    # Display search metadata
    st.divider()
    col1, col2 = st.columns(2)
    with col1:
        st.caption(f"Search Type: {metaphor_response['data']['resolvedSearchType']}")
    with col2:
        st.caption(f"Request ID: {metaphor_response['data']['requestId']}")
 def metaphor_news_summarizer(news_keywords):
--- a/lib/alwrity_ui/google_trends_ui.py
+++ b/lib/alwrity_ui/google_trends_ui.py
@@ -0,0 +1,425 @@
 """
 Module for displaying Google Trends data in the Streamlit UI.
 This module provides functions for visualizing Google Trends data, including:
 - Interest over time
 - Regional interest
 - Related queries
 - Related topics
 """
 import streamlit as st
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 import logging
 # Set up logging
 logger = logging.getLogger(__name__)
 def display_google_trends_data(trends_data, search_keyword):
    """
    Display Google Trends data in a structured format with tabs for different sections.
    Args:
        trends_data (dict): Dictionary containing Google Trends data
        search_keyword (str): The search keyword used for the analysis
    """
    if not trends_data:
        st.warning("No Google Trends data available for this search.")
        return
    st.subheader(f"Google Trends Analysis for '{search_keyword}'")
    # Add an informative message about Google Trends
    with st.expander("ℹ️ About Google Trends Data", expanded=False):
        st.markdown("""
        **What is Google Trends?**
        Google Trends is a public web facility that shows how often a particular search-term is entered relative to the total search-volume across various regions of the world, and in various languages.
        **What data is shown here?**
        - **Related Keywords**: Terms that are frequently searched together with your keyword
        - **Interest Over Time**: How interest in your keyword has changed over the past 12 months
        - **Regional Interest**: Where in the world your keyword is most popular
        - **Related Queries**: What people search for before and after searching for your keyword
        - **Related Topics**: Topics that are closely related to your keyword
        **How to interpret the data:**
        - Interest values range from 0 to 100, where 100 is the peak popularity for the term
        - A value of 50 means the term is half as popular as the peak
        - A value of 0 means there was not enough data for this term
        """)
    # Create tabs for different sections
    tab1, tab2, tab3, tab4, tab5 = st.tabs([
        "Related Keywords", 
        "Interest Over Time", 
        "Regional Interest", 
        "Related Queries", 
        "Related Topics"
    ])
    with tab1:
        display_keywords_section(trends_data.get('related_keywords', []))
    with tab2:
        display_interest_over_time(trends_data.get('interest_over_time', pd.DataFrame()))
    with tab3:
        display_regional_interest(trends_data.get('regional_interest', pd.DataFrame()))
    with tab4:
        display_related_queries(trends_data.get('related_queries', pd.DataFrame()))
    with tab5:
        display_related_topics(trends_data.get('related_topics', pd.DataFrame()))
    # Add a footer with data source information
    st.markdown("---")
    st.caption("Data source: Google Trends | Last updated: " + pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"))
 def display_keywords_section(keywords):
    """Display related keywords from Google Trends in a table format."""
    if not keywords:
        st.info("No related keywords data available.")
        return
    st.subheader("Related Keywords")
    st.write("Keywords related to your search:")
    # Add explanation about related keywords
    with st.expander("ℹ️ About Related Keywords", expanded=False):
        st.markdown("""
        **What are Related Keywords?**
        Related keywords are terms that are frequently searched together with your main keyword. 
        These keywords can help you understand what topics are associated with your search term 
        and can be valuable for content planning and SEO strategies.
        **How to use this data:**
        - Use these keywords to expand your content strategy
        - Identify gaps in your content that you could fill
        - Understand what your audience is interested in
        - Improve your SEO by incorporating these terms naturally in your content
        """)
    # Create a DataFrame for better display
    df = pd.DataFrame(keywords, columns=['Keyword'])
    st.dataframe(df, use_container_width=True)
    # Add a note about the number of keywords
    st.caption(f"Found {len(keywords)} related keywords")
 def display_interest_over_time(interest_df):
    """Display a chart showing interest over time for a given search keyword."""
    if interest_df.empty:
        st.info("No interest over time data available.")
        return
    st.subheader("Interest Over Time")
    # Add explanation about interest over time
    with st.expander("ℹ️ About Interest Over Time", expanded=False):
        st.markdown("""
        **What is Interest Over Time?**
        Interest Over Time shows how interest in your search term has changed over the past 12 months.
        The data is normalized and presented on a scale from 0 to 100, where 100 is the peak popularity 
        for the term, 50 means the term is half as popular, and 0 means there was not enough data.
        **How to interpret this chart:**
        - Look for peaks and valleys to identify trends
        - Compare with seasonal patterns or events
        - Identify if interest is growing, declining, or stable
        - Use this data to time your content releases for maximum impact
        """)
    try:
        # Ensure we have the required columns
        if 'date' not in interest_df.columns:
            st.error("Interest over time data is missing the 'date' column.")
            return
        if 'interest' not in interest_df.columns:
            st.error("Interest over time data is missing the 'interest' column.")
            return
        # Create the chart
        fig = px.line(
            interest_df, 
            x='date', 
            y='interest',
            title='Interest Over Time',
            labels={'date': 'Date', 'interest': 'Interest'},
            line_shape='spline'
        )
        fig.update_layout(
            xaxis_title="Date",
            yaxis_title="Interest",
            hovermode='x unified'
        )
        st.plotly_chart(fig, use_container_width=True)
        # Add summary statistics
        if not interest_df.empty:
            col1, col2, col3 = st.columns(3)
            with col1:
                st.metric("Average Interest", f"{interest_df['interest'].mean():.1f}")
            with col2:
                st.metric("Peak Interest", f"{interest_df['interest'].max():.1f}")
            with col3:
                st.metric("Lowest Interest", f"{interest_df['interest'].min():.1f}")
    except Exception as e:
        st.error(f"Error displaying interest over time chart: {str(e)}")
        logger.error(f"Error in display_interest_over_time: {e}")
 def display_regional_interest(regional_df):
    """Display a chart showing interest by region for the search keyword."""
    if regional_df.empty:
        st.info("No regional interest data available.")
        return
    st.subheader("Regional Interest")
    # Add explanation about regional interest
    with st.expander("ℹ️ About Regional Interest", expanded=False):
        st.markdown("""
        **What is Regional Interest?**
        Regional Interest shows how interest in your search term varies across different countries.
        The data is normalized and presented on a scale from 0 to 100, where 100 is the peak popularity 
        for the term in that region, 50 means the term is half as popular, and 0 means there was not enough data.
        **How to interpret this map:**
        - Darker colors indicate higher interest in that region
        - Lighter colors indicate lower interest
        - Hover over a country to see the exact interest value
        - Use this data to target your content to specific regions
        """)
    try:
        # Ensure we have the required columns
        if 'country_code' not in regional_df.columns:
            st.error("Regional interest data is missing the 'country_code' column.")
            return
        if 'interest' not in regional_df.columns:
            st.error("Regional interest data is missing the 'interest' column.")
            return
        # Create the choropleth map
        fig = go.Figure(data=go.Choropleth(
            locations=regional_df['country_code'],
            z=regional_df['interest'],
            text=regional_df['country_code'],  # This will show in the hover text
            colorscale='Viridis',
            colorbar_title="Interest Level",
            zmin=0,
            zmax=100,
            marker_line_color='darkgray',
            marker_line_width=0.5,
            showscale=True,
            colorbar=dict(
                title="Interest Level",
                tickformat=".0f",
                tickmode="linear",
                tick0=0,
                dtick=20
            )
        ))
        # Update the layout for better visualization
        fig.update_layout(
            title=dict(
                text='Regional Interest Distribution',
                x=0.5,
                xanchor='center'
            ),
            geo=dict(
                showframe=False,
                showcoastlines=True,
                projection_type='equirectangular',
                showland=True,
                landcolor='lightgray',
                showocean=True,
                oceancolor='aliceblue',
                showcountries=True,
                countrycolor='darkgray'
            ),
            width=800,
            height=500,
            margin=dict(l=0, r=0, t=30, b=0)
        )
        # Display the map
        st.plotly_chart(fig, use_container_width=True)
        # Display top 5 countries with highest interest
        if not regional_df.empty:
            st.subheader("Top Regions by Interest")
            top_regions = regional_df.sort_values('interest', ascending=False).head(5)
            # Create a more visually appealing bar chart for top regions
            fig_bar = go.Figure(data=[
                go.Bar(
                    x=top_regions['country_code'],
                    y=top_regions['interest'],
                    text=top_regions['interest'].round(1),
                    textposition='auto',
                    marker_color='rgb(55, 83, 109)'
                )
            ])
            fig_bar.update_layout(
                title='Top 5 Regions by Interest Level',
                xaxis_title='Region',
                yaxis_title='Interest Level',
                yaxis_range=[0, 100],
                showlegend=False
            )
            st.plotly_chart(fig_bar, use_container_width=True)
    except Exception as e:
        st.error(f"Error displaying regional interest chart: {str(e)}")
        logger.error(f"Error in display_regional_interest: {e}")
 def display_related_queries(queries_df):
    """Display related queries in a structured format."""
    if queries_df.empty:
        st.info("No related queries data available.")
        return
    st.subheader("Related Queries")
    # Add explanation about related queries
    with st.expander("ℹ️ About Related Queries", expanded=False):
        st.markdown("""
        **What are Related Queries?**
        Related Queries show what people search for before and after searching for your keyword.
        These queries can help you understand the search intent and context around your keyword.
        **How to interpret this data:**
        - The 'value' column shows the relative interest compared to your main keyword
        - Higher values indicate stronger association with your keyword
        - Use these queries to expand your content strategy
        - Identify what questions your audience is trying to answer
        """)
    try:
        # Ensure we have the required columns
        if 'query' not in queries_df.columns:
            st.error("Related queries data is missing the 'query' column.")
            return
        if 'value' not in queries_df.columns:
            st.error("Related queries data is missing the 'value' column.")
            return
        # Sort by value in descending order
        queries_df = queries_df.sort_values('value', ascending=False)
        # Display as a table
        st.dataframe(queries_df, use_container_width=True)
        # Add a note about the number of queries
        st.caption(f"Found {len(queries_df)} related queries")
    except Exception as e:
        st.error(f"Error displaying related queries: {str(e)}")
        logger.error(f"Error in display_related_queries: {e}")
 def display_related_topics(topics_df):
    """Display related topics in a structured format."""
    if topics_df.empty:
        st.info("No related topics data available.")
        return
    st.subheader("Related Topics")
    # Add explanation about related topics
    with st.expander("ℹ️ About Related Topics", expanded=False):
        st.markdown("""
        **What are Related Topics?**
        Related Topics show broader topics that are associated with your search term.
        These topics can help you understand the broader context and themes related to your keyword.
        **How to interpret this data:**
        - The 'value' column shows the relative interest compared to your main keyword
        - Higher values indicate stronger association with your keyword
        - Use these topics to understand the broader context of your keyword
        - Identify themes that might be relevant to your content strategy
        """)
    try:
        # Ensure we have the required columns
        if 'topic' not in topics_df.columns:
            st.error("Related topics data is missing the 'topic' column.")
            return
        if 'value' not in topics_df.columns:
            st.error("Related topics data is missing the 'value' column.")
            return
        # Sort by value in descending order
        topics_df = topics_df.sort_values('value', ascending=False)
        # Display as a table
        st.dataframe(topics_df, use_container_width=True)
        # Add a note about the number of topics
        st.caption(f"Found {len(topics_df)} related topics")
    except Exception as e:
        st.error(f"Error displaying related topics: {str(e)}")
        logger.error(f"Error in display_related_topics: {e}")
 def process_trends_data(trends_data):
    """
    Process and format Google Trends data for display.
    Args:
        trends_data (dict): Raw Google Trends data
    Returns:
        dict: Formatted data ready for display
    """
    if not trends_data:
        return {}
    processed_data = {}
    # Process related keywords
    if 'related_keywords' in trends_data:
        processed_data['related_keywords'] = trends_data['related_keywords']
    # Process interest over time
    if 'interest_over_time' in trends_data and not trends_data['interest_over_time'].empty:
        processed_data['interest_over_time'] = trends_data['interest_over_time']
    # Process regional interest
    if 'regional_interest' in trends_data and not trends_data['regional_interest'].empty:
        processed_data['regional_interest'] = trends_data['regional_interest']
    # Process related queries
    if 'related_queries' in trends_data and not trends_data['related_queries'].empty:
        processed_data['related_queries'] = trends_data['related_queries']
    # Process related topics
    if 'related_topics' in trends_data and not trends_data['related_topics'].empty:
        processed_data['related_topics'] = trends_data['related_topics']
    return processed_data 
--- a/lib/alwrity_ui/keyword_web_researcher.py
+++ b/lib/alwrity_ui/keyword_web_researcher.py
@@ -102,7 +102,12 @@ def validate_api_keys():
    return api_keys
 def do_web_research():
-    """Input keywords and do web research with advanced options."""
+    """Main function to perform web research based on user input."""
    # Reset session state variables for this research operation
    if 'metaphor_results_displayed' in st.session_state:
        del st.session_state.metaphor_results_displayed
    logger.info("Starting do_web_research function")
    try:
@@ -509,7 +514,7 @@ def do_web_research():
                    status_display.success("✨ Research completed!")
                    # Display results in an organized way
-                    with st.expander("📊 Research Results", expanded=True):
+                    with st.expander("📊 Research Results", expanded=False):
                        st.write(web_research_result)
                else:
                    st.warning("No results found for your search")