diff --git a/lib/ai_web_researcher/google_trends_researcher.py b/lib/ai_web_researcher/google_trends_researcher.py index 6e2dde5d..2088d85f 100644 --- a/lib/ai_web_researcher/google_trends_researcher.py +++ b/lib/ai_web_researcher/google_trends_researcher.py @@ -105,124 +105,55 @@ def plot_interest_by_region(kw_list): -def get_related_queries_and_save_csv(keywords, hl='en-US', tz=360, cat=0, timeframe='today 12-m'): - """ - Get related queries for the given search keywords and save the result to a CSV file. - - Args: - search_keywords (list): List of search keywords. - hl (str): Language parameter, default is 'en-US'. - tz (int): Timezone parameter, default is 360. - cat (int): Category parameter, default is 0. - timeframe (str): Timeframe parameter, default is 'today 12-m'. - - Returns: - pd.DataFrame: DataFrame containing related queries. - """ - try: - # Build model - pytrends = TrendReq(hl=hl, tz=tz) - pytrends.build_payload(kw_list=keywords, cat=cat, timeframe=timeframe) - - # Get related queries - data = pytrends.related_queries() - - # Extract data from the result - top_queries = list(data.values())[0]['top'] - rising_queries = list(data.values())[0]['rising'] - top_rising_queries = top_queries + rising_queries - - # Convert lists to DataFrames - df_top_queries = pd.DataFrame(top_queries) - df_rising_queries = pd.DataFrame(rising_queries) # Added this line - - # Rename columns to avoid duplicates - df_top_queries.columns = ['Top query', 'value'] - df_rising_queries.columns = ['Rising query', 'value'] - - # Save to CSV - all_queries_df = pd.concat([df_top_queries, df_rising_queries], axis=1) - #all_queries_df.to_csv('related_queries.csv', index=False) - - # Display additional information - console = Console() - # Display additional information with emojis and bold formatting - print("\nš¢āšØ ") - print("\n\033[1mš Top\033[0m: The most popular search queries. Scoring is on a relative scale where a value of 100 is the most commonly searched query, 50 is a query searched half as often, and a value of 0 is a query searched for less than 1% as often as the most popular query.\n") - print("\n\033[1mš Rising\033[0m: Queries with the biggest increase in search frequency since the last time period. Results marked 'Breakout' had a tremendous increase, probably because these queries are new and had few (if any) prior searches.\n") - # Display the DataFrame using tabulate - table = tabulate(all_queries_df, headers='keys', tablefmt='fancy_grid') - print(table) - # Save the combined table to a file - try: - save_in_file(table) - except Exception as save_results_err: - logger.error(f"Failed to save search results: {save_results_err}") - return top_rising_queries - - except Exception as e: - print(f"get_related_queries_and_save_csv: ERROR: An error occurred: {e}") - - def get_related_topics_and_save_csv(search_keywords): - """ - Get related topics for the given search keywords and save the result to a CSV file. - - Args: - search_keywords (list): List of search keywords. - - Returns: - pd.DataFrame: DataFrame containing related topics. - """ + search_keywords = [f"{search_keywords}"] try: - # Build model pytrends = TrendReq(hl='en-US', tz=360) + pytrends.build_payload(kw_list=search_keywords, timeframe='today 12-m') - # Build payload - # FIXME: Remove hardcoding. - pytrends.build_payload(search_keywords, cat=0, timeframe='today 12-m') - - # Get related topics - try: - data = pytrends.related_topics() - except Exception as err: - logger.error(f"Failed to get pytrends realted topics: {err}") - return None - - # Extract data from the result - top_topics = list(data.values())[0]['top'] - rising_topics = list(data.values())[0]['rising'] + # Get related topics - this returns a dictionary + topics_data = pytrends.related_topics() - # Convert lists to DataFrames - df_top_topics = pd.DataFrame(top_topics) - df_rising_topics = pd.DataFrame(rising_topics) - - # FIXME:Exclude specified columns - columns_to_exclude = ['hasData', 'value', 'topic_mid', 'link'] - df_top_topics = df_top_topics.drop(columns=columns_to_exclude, errors='ignore') - df_rising_topics = df_rising_topics.drop(columns=columns_to_exclude, errors='ignore') - - # Rename columns to avoid duplicates and provide meaningful names - df_top_topics.columns = ['Top- ' + col if col != 'topic_title' else col for col in df_top_topics.columns] - df_rising_topics.columns = ['Rising- ' + col if col != 'topic_title' else col for col in df_rising_topics.columns] - all_topics_df = pd.concat([df_top_topics, df_rising_topics], axis=1) - - print(f"\n\n š¢āšØ Rising and Trending Keywords for {search_keywords}\n") - print("\033[1mš Top\033[0m: The most popular search topics.") - print("\033[1mš Rising\033[0m: Topics experiencing a significant increase in search frequency since the last time period. Topics marked :pile_of_poop:'Breakout' had a tremendous surge, likely because they are new and had few prior searches.") - # Display the DataFrame using tabulate - pd.set_option('display.max_rows', all_topics_df.shape[0]+1) - print(all_topics_df.head(10)) - table = tabulate(all_topics_df, headers='keys', tablefmt='fancy_grid') - try: - save_in_file(table) - except Exception as save_results_err: - logger.error(f"Failed to save search results: {save_results_err}") - return all_topics_df - + # Extract data for the first keyword + if topics_data and search_keywords[0] in topics_data: + keyword_data = topics_data[search_keywords[0]] + + # Create two separate dataframes for top and rising + top_df = keyword_data.get('top', pd.DataFrame()) + rising_df = keyword_data.get('rising', pd.DataFrame()) + + return { + 'top': top_df[['topic_title', 'value']] if not top_df.empty else pd.DataFrame(), + 'rising': rising_df[['topic_title', 'value']] if not rising_df.empty else pd.DataFrame() + } except Exception as e: - logger.error(f"ERROR: An error occurred in related topics: {e}") - return pd.DataFrame() + logger.error(f"Error in related topics: {e}") + return {'top': pd.DataFrame(), 'rising': pd.DataFrame()} + +def get_related_queries_and_save_csv(search_keywords): + search_keywords = [f"{search_keywords}"] + try: + pytrends = TrendReq(hl='en-US', tz=360) + pytrends.build_payload(kw_list=search_keywords, timeframe='today 12-m') + + # Get related queries - this returns a dictionary + queries_data = pytrends.related_queries() + + # Extract data for the first keyword + if queries_data and search_keywords[0] in queries_data: + keyword_data = queries_data[search_keywords[0]] + + # Create two separate dataframes for top and rising + top_df = keyword_data.get('top', pd.DataFrame()) + rising_df = keyword_data.get('rising', pd.DataFrame()) + + return { + 'top': top_df if not top_df.empty else pd.DataFrame(), + 'rising': rising_df if not rising_df.empty else pd.DataFrame() + } + except Exception as e: + logger.error(f"Error in related queries: {e}") + return {'top': pd.DataFrame(), 'rising': pd.DataFrame()} def get_source(url): @@ -507,22 +438,17 @@ def do_google_trends_analysis(search_term): else: all_the_keywords.append(suggestions_df['Keywords'].tolist()) all_the_keywords = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in all_the_keywords]) + # Generate a random sleep time between 2 and 3 seconds time.sleep(random.uniform(2, 3)) - -# -# # FIXME: Get result from vision GPT. Fetch and visualize Google Trends data -# #trends_data = fetch_google_trends_interest_overtime("llamaindex") -# -# # FIXME: Plot Interest Over time. -# result_df = plot_interest_by_region(search_term) -# + # Display additional information try: result_df = get_related_topics_and_save_csv(search_term) + logger.info(f"Related topics:: result_df: {result_df}") # Extract 'Top' topic_title if result_df: - top_topic_title = result_df['topic_title'].values.tolist() + top_topic_title = result_df['top']['topic_title'].values.tolist() # Join each sublist into one string separated by comma #top_topic_title = [','.join(filter(None, map(str, sublist))) for sublist in top_topic_title] top_topic_title = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in top_topic_title]) @@ -551,3 +477,77 @@ def do_google_trends_analysis(search_term): return(all_the_keywords) except Exception as e: logger.error(f"Error in Google Trends Analysis: {e}") + + +def get_trending_searches(country='united_states'): + """Get trending searches for a specific country.""" + try: + pytrends = TrendReq(hl='en-US', tz=360) + trending_searches = pytrends.trending_searches(pn=country) + return trending_searches + except Exception as e: + logger.error(f"Error getting trending searches: {e}") + return pd.DataFrame() + +def display_trending_searches(trending_df): + """Display trending searches in the UI.""" + if trending_df.empty: + st.info("No trending searches data available.") + return + + st.subheader("š Trending Searches") + + # Display as numbered list with emojis + for idx, search in enumerate(trending_df[0].head(10), 1): + st.write(f"{idx}. š {search}") + +def get_realtime_trends(country='US'): + """Get realtime trending searches for a specific country.""" + try: + pytrends = TrendReq(hl='en-US', tz=360) + realtime_trends = pytrends.realtime_trending_searches(pn=country) + return realtime_trends + except Exception as e: + logger.error(f"Error getting realtime trends: {e}") + return pd.DataFrame() + +def display_realtime_trends(trends_df): + """Display realtime trending searches in the UI.""" + if trends_df.empty: + st.info("No realtime trends data available.") + return + + st.subheader("ā” Realtime Trends") + + # Create tabs for different categories + if not trends_df.empty: + # Display top 5 trends with their titles and articles + for _, row in trends_df.head(5).iterrows(): + with st.expander(f"š„ {row.get('title', 'Trending Topic')}"): + st.write(f"**Traffic:** {row.get('traffic', 'N/A')}") + if 'articles' in row: + st.write("š° Related Articles:") + for article in row['articles'][:3]: # Show top 3 articles + st.write(f"- {article['title']}") + +def display_google_trends_data(trends_data, search_keyword): + # ... existing code ... + + # Create tabs for different sections + tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([ + "Related Keywords", + "Interest Over Time", + "Regional Interest", + "Related Queries", + "Related Topics", + "Trending Now" + ]) + + # ... existing tab code ... + + with tab6: + col1, col2 = st.columns(2) + with col1: + display_trending_searches(trends_data.get('trending_searches', pd.DataFrame())) + with col2: + display_realtime_trends(trends_data.get('realtime_trends', pd.DataFrame())) diff --git a/lib/ai_web_researcher/gpt_online_researcher.py b/lib/ai_web_researcher/gpt_online_researcher.py index d2238143..0719680f 100644 --- a/lib/ai_web_researcher/gpt_online_researcher.py +++ b/lib/ai_web_researcher/gpt_online_researcher.py @@ -27,15 +27,19 @@ from pathlib import Path import sys from datetime import datetime import streamlit as st +import pandas as pd +import random +import numpy as np from lib.alwrity_ui.display_google_serp_results import ( process_research_results, process_search_results, display_research_results ) +from lib.alwrity_ui.google_trends_ui import display_google_trends_data, process_trends_data from .tavily_ai_search import get_tavilyai_results -from .metaphor_basic_neural_web_search import metaphor_search_articles +from .metaphor_basic_neural_web_search import metaphor_search_articles, streamlit_display_metaphor_results from .google_serp_search import google_search from .google_trends_researcher import do_google_trends_analysis #from .google_gemini_web_researcher import do_gemini_web_research @@ -56,6 +60,10 @@ def gpt_web_researcher(search_keywords, search_mode, **kwargs): logger.debug(f"Additional parameters: {kwargs}") try: + # Reset session state variables for this research operation + if 'metaphor_results_displayed' in st.session_state: + del st.session_state.metaphor_results_displayed + # Initialize result container research_results = None @@ -157,13 +165,76 @@ def gpt_web_researcher(search_keywords, search_mode, **kwargs): update_progress("Metaphor AI search failed, continuing with Tavily results only...", level="warning") else: update_progress("Metaphor AI search completed successfully", progress=75) + # Add debug logging to check the structure of metaphor_results + logger.debug(f"Metaphor results structure: {type(metaphor_results)}") + if isinstance(metaphor_results, dict): + logger.debug(f"Metaphor results keys: {metaphor_results.keys()}") + if 'data' in metaphor_results: + logger.debug(f"Metaphor data keys: {metaphor_results['data'].keys()}") + if 'results' in metaphor_results['data']: + logger.debug(f"Number of results: {len(metaphor_results['data']['results'])}") + + # Display Metaphor results only if not already displayed + if 'metaphor_results_displayed' not in st.session_state: + st.session_state.metaphor_results_displayed = True + # Make sure to pass the correct parameters to streamlit_display_metaphor_results + streamlit_display_metaphor_results(metaphor_results, search_keywords) + + # Add Google Trends Analysis + update_progress("Initiating Google Trends analysis...", progress=80) + try: + # Add an informative message about Google Trends + with st.expander("ā¹ļø About Google Trends Analysis", expanded=False): + st.markdown(""" + **What is Google Trends Analysis?** + + Google Trends Analysis provides insights into how often a particular search-term is entered relative to the total search-volume across various regions of the world, and in various languages. + + **What data will be shown?** + + - **Related Keywords**: Terms that are frequently searched together with your keyword + - **Interest Over Time**: How interest in your keyword has changed over the past 12 months + - **Regional Interest**: Where in the world your keyword is most popular + - **Related Queries**: What people search for before and after searching for your keyword + - **Related Topics**: Topics that are closely related to your keyword + + **How to use this data:** + + - Identify trending topics in your industry + - Understand seasonal patterns in search behavior + - Discover related keywords for content planning + - Target content to specific regions with high interest + """) + + trends_results = do_google_pytrends_analysis(search_keywords) + if trends_results: + update_progress("Google Trends analysis completed successfully", progress=90) + # Store trends results in the research_results + if metaphor_results: + metaphor_results['trends_data'] = trends_results + else: + # If metaphor_results is None, create a new container for results + metaphor_results = {'trends_data': trends_results} + + # Display Google Trends data using the new UI module + display_google_trends_data(trends_results, search_keywords) + else: + update_progress("Google Trends analysis returned no results", level="warning") + except Exception as trends_err: + logger.error(f"Google Trends analysis failed: {trends_err}") + update_progress("Google Trends analysis failed", level="warning") + st.error(f"Error in Google Trends analysis: {str(trends_err)}") + + # Return the combined results + update_progress("Research completed!", progress=100, level="success") + return metaphor_results or t_results except Exception as ai_err: error_msg = f"AI research pipeline failed: {str(ai_err)}" logger.error(error_msg, exc_info=True) update_progress(error_msg, level="error") raise - + else: error_msg = f"Unsupported search mode: {search_mode}" logger.error(error_msg) @@ -316,13 +387,355 @@ def do_metaphor_ai_research(search_keywords): return None, None -def do_google_pytrends_analysis(search_keywords): - """ """ +def do_google_pytrends_analysis(keywords): + """ + Perform Google Trends analysis for the given keywords. + + Args: + keywords (str): The search keywords to analyze + + Returns: + dict: A dictionary containing formatted Google Trends data with the following keys: + - related_keywords: List of related keywords + - interest_over_time: DataFrame with date and interest columns + - regional_interest: DataFrame with country_code, country, and interest columns + - related_queries: DataFrame with query and value columns + - related_topics: DataFrame with topic and value columns + """ + logger.info(f"Performing Google Trends analysis for keywords: {keywords}") + + # Create a progress container for Streamlit + progress_container = st.empty() + progress_bar = st.progress(0) + + def update_progress(message, progress=None, level="info"): + """Helper function to update progress in Streamlit UI""" + if progress is not None: + progress_bar.progress(progress) + + if level == "error": + progress_container.error(f"š« {message}") + elif level == "warning": + progress_container.warning(f"ā ļø {message}") + else: + progress_container.info(f"š {message}") + logger.debug(f"Progress update [{level}]: {message}") + try: - logger.info(f"Do Google Trends analysis for given keywords: {search_keywords}") - return(do_google_trends_analysis(search_keywords)) - except Exception as err: - logger.error(f"Failed to do google trends analysis: {err}") + # Initialize the formatted data dictionary + formatted_data = { + 'related_keywords': [], + 'interest_over_time': pd.DataFrame(), + 'regional_interest': pd.DataFrame(), + 'related_queries': pd.DataFrame(), + 'related_topics': pd.DataFrame() + } + + # Get raw trends data from google_trends_researcher + update_progress("Fetching Google Trends data...", progress=10) + raw_trends_data = do_google_trends_analysis(keywords) + + if not raw_trends_data: + logger.warning("No Google Trends data returned") + update_progress("No Google Trends data returned", level="warning", progress=20) + return formatted_data + + # Process related keywords from the raw data + update_progress("Processing related keywords...", progress=30) + if isinstance(raw_trends_data, list): + formatted_data['related_keywords'] = raw_trends_data + elif isinstance(raw_trends_data, dict): + if 'keywords' in raw_trends_data: + formatted_data['related_keywords'] = raw_trends_data['keywords'] + if 'interest_over_time' in raw_trends_data: + formatted_data['interest_over_time'] = raw_trends_data['interest_over_time'] + if 'regional_interest' in raw_trends_data: + formatted_data['regional_interest'] = raw_trends_data['regional_interest'] + if 'related_queries' in raw_trends_data: + formatted_data['related_queries'] = raw_trends_data['related_queries'] + if 'related_topics' in raw_trends_data: + formatted_data['related_topics'] = raw_trends_data['related_topics'] + + # If we have keywords but missing other data, try to fetch them using pytrends directly + if formatted_data['related_keywords'] and ( + formatted_data['interest_over_time'].empty or + formatted_data['regional_interest'].empty or + formatted_data['related_queries'].empty or + formatted_data['related_topics'].empty + ): + try: + update_progress("Fetching additional data from Google Trends API...", progress=40) + from pytrends.request import TrendReq + pytrends = TrendReq(hl='en-US', tz=360) + + # Build payload with the main keyword + update_progress("Building search payload...", progress=45) + pytrends.build_payload([keywords], timeframe='today 12-m', geo='') + + # Get interest over time if missing + if formatted_data['interest_over_time'].empty: + try: + update_progress("Fetching interest over time data...", progress=50) + interest_df = pytrends.interest_over_time() + if not interest_df.empty: + formatted_data['interest_over_time'] = interest_df.reset_index() + update_progress(f"Successfully fetched interest over time data with {len(formatted_data['interest_over_time'])} data points", progress=55) + else: + update_progress("No interest over time data available", level="warning", progress=55) + except Exception as e: + logger.error(f"Error fetching interest over time: {e}") + update_progress(f"Error fetching interest over time: {str(e)}", level="warning", progress=55) + + # Get regional interest if missing + if formatted_data['regional_interest'].empty: + try: + update_progress("Fetching regional interest data...", progress=60) + regional_df = pytrends.interest_by_region() + if not regional_df.empty: + formatted_data['regional_interest'] = regional_df.reset_index() + update_progress(f"Successfully fetched regional interest data for {len(formatted_data['regional_interest'])} regions", progress=65) + else: + update_progress("No regional interest data available", level="warning", progress=65) + except Exception as e: + logger.error(f"Error fetching regional interest: {e}") + update_progress(f"Error fetching regional interest: {str(e)}", level="warning", progress=65) + + # Get related queries if missing + if formatted_data['related_queries'].empty: + try: + update_progress("Fetching related queries data...", progress=70) + # Get related queries data + related_queries = pytrends.related_queries() + + # Create empty DataFrame as fallback + formatted_data['related_queries'] = pd.DataFrame(columns=['query', 'value']) + + # Simple direct approach to avoid list index errors + if related_queries and isinstance(related_queries, dict): + # Check if our keyword exists in the results + if keywords in related_queries: + keyword_data = related_queries[keywords] + + # Process top queries if available + if 'top' in keyword_data and keyword_data['top'] is not None: + try: + update_progress("Processing top related queries...", progress=75) + # Convert to DataFrame if it's not already + if isinstance(keyword_data['top'], pd.DataFrame): + top_df = keyword_data['top'] + else: + # Try to convert to DataFrame + top_df = pd.DataFrame(keyword_data['top']) + + # Ensure it has the right columns + if not top_df.empty: + # Rename columns if needed + if 'query' in top_df.columns: + # Already has the right column name + pass + elif len(top_df.columns) > 0: + # Use first column as query + top_df = top_df.rename(columns={top_df.columns[0]: 'query'}) + + # Add to our results + formatted_data['related_queries'] = top_df + update_progress(f"Successfully processed {len(top_df)} top related queries", progress=80) + except Exception as e: + logger.warning(f"Error processing top queries: {e}") + update_progress(f"Error processing top queries: {str(e)}", level="warning", progress=80) + + # Process rising queries if available + if 'rising' in keyword_data and keyword_data['rising'] is not None: + try: + update_progress("Processing rising related queries...", progress=85) + # Convert to DataFrame if it's not already + if isinstance(keyword_data['rising'], pd.DataFrame): + rising_df = keyword_data['rising'] + else: + # Try to convert to DataFrame + rising_df = pd.DataFrame(keyword_data['rising']) + + # Ensure it has the right columns + if not rising_df.empty: + # Rename columns if needed + if 'query' in rising_df.columns: + # Already has the right column name + pass + elif len(rising_df.columns) > 0: + # Use first column as query + rising_df = rising_df.rename(columns={rising_df.columns[0]: 'query'}) + + # Combine with existing data if we have any + if not formatted_data['related_queries'].empty: + formatted_data['related_queries'] = pd.concat([formatted_data['related_queries'], rising_df]) + update_progress(f"Successfully processed {len(rising_df)} rising related queries", progress=90) + else: + formatted_data['related_queries'] = rising_df + update_progress(f"Successfully processed {len(rising_df)} rising related queries", progress=90) + except Exception as e: + logger.warning(f"Error processing rising queries: {e}") + update_progress(f"Error processing rising queries: {str(e)}", level="warning", progress=90) + except Exception as e: + logger.error(f"Error fetching related queries: {e}") + update_progress(f"Error fetching related queries: {str(e)}", level="warning", progress=90) + # Ensure we have an empty DataFrame with the right columns + formatted_data['related_queries'] = pd.DataFrame(columns=['query', 'value']) + + # Get related topics if missing + if formatted_data['related_topics'].empty: + try: + update_progress("Fetching related topics data...", progress=95) + # Get related topics data + related_topics = pytrends.related_topics() + + # Create empty DataFrame as fallback + formatted_data['related_topics'] = pd.DataFrame(columns=['topic', 'value']) + + # Simple direct approach to avoid list index errors + if related_topics and isinstance(related_topics, dict): + # Check if our keyword exists in the results + if keywords in related_topics: + keyword_data = related_topics[keywords] + + # Process top topics if available + if 'top' in keyword_data and keyword_data['top'] is not None: + try: + update_progress("Processing top related topics...", progress=97) + # Convert to DataFrame if it's not already + if isinstance(keyword_data['top'], pd.DataFrame): + top_df = keyword_data['top'] + else: + # Try to convert to DataFrame + top_df = pd.DataFrame(keyword_data['top']) + + # Ensure it has the right columns + if not top_df.empty: + # Rename columns if needed + if 'topic_title' in top_df.columns: + top_df = top_df.rename(columns={'topic_title': 'topic'}) + elif len(top_df.columns) > 0 and 'topic' not in top_df.columns: + # Use first column as topic + top_df = top_df.rename(columns={top_df.columns[0]: 'topic'}) + + # Add to our results + formatted_data['related_topics'] = top_df + update_progress(f"Successfully processed {len(top_df)} top related topics", progress=98) + except Exception as e: + logger.warning(f"Error processing top topics: {e}") + update_progress(f"Error processing top topics: {str(e)}", level="warning", progress=98) + + # Process rising topics if available + if 'rising' in keyword_data and keyword_data['rising'] is not None: + try: + update_progress("Processing rising related topics...", progress=99) + # Convert to DataFrame if it's not already + if isinstance(keyword_data['rising'], pd.DataFrame): + rising_df = keyword_data['rising'] + else: + # Try to convert to DataFrame + rising_df = pd.DataFrame(keyword_data['rising']) + + # Ensure it has the right columns + if not rising_df.empty: + # Rename columns if needed + if 'topic_title' in rising_df.columns: + rising_df = rising_df.rename(columns={'topic_title': 'topic'}) + elif len(rising_df.columns) > 0 and 'topic' not in rising_df.columns: + # Use first column as topic + rising_df = rising_df.rename(columns={rising_df.columns[0]: 'topic'}) + + # Combine with existing data if we have any + if not formatted_data['related_topics'].empty: + formatted_data['related_topics'] = pd.concat([formatted_data['related_topics'], rising_df]) + update_progress(f"Successfully processed {len(rising_df)} rising related topics", progress=100) + else: + formatted_data['related_topics'] = rising_df + update_progress(f"Successfully processed {len(rising_df)} rising related topics", progress=100) + except Exception as e: + logger.warning(f"Error processing rising topics: {e}") + update_progress(f"Error processing rising topics: {str(e)}", level="warning", progress=100) + except Exception as e: + logger.error(f"Error fetching related topics: {e}") + update_progress(f"Error fetching related topics: {str(e)}", level="warning", progress=100) + # Ensure we have an empty DataFrame with the right columns + formatted_data['related_topics'] = pd.DataFrame(columns=['topic', 'value']) + + except Exception as e: + logger.error(f"Error fetching additional trends data: {e}") + update_progress(f"Error fetching additional trends data: {str(e)}", level="warning", progress=100) + + # Ensure all DataFrames have the correct column names for the UI + update_progress("Finalizing data formatting...", progress=100) + + if not formatted_data['interest_over_time'].empty: + if 'date' not in formatted_data['interest_over_time'].columns: + formatted_data['interest_over_time'] = formatted_data['interest_over_time'].reset_index() + if 'interest' not in formatted_data['interest_over_time'].columns and keywords in formatted_data['interest_over_time'].columns: + formatted_data['interest_over_time'] = formatted_data['interest_over_time'].rename(columns={keywords: 'interest'}) + + if not formatted_data['regional_interest'].empty: + if 'country_code' not in formatted_data['regional_interest'].columns and 'geoName' in formatted_data['regional_interest'].columns: + formatted_data['regional_interest'] = formatted_data['regional_interest'].rename(columns={'geoName': 'country_code'}) + if 'interest' not in formatted_data['regional_interest'].columns and keywords in formatted_data['regional_interest'].columns: + formatted_data['regional_interest'] = formatted_data['regional_interest'].rename(columns={keywords: 'interest'}) + + if not formatted_data['related_queries'].empty: + # Handle different column names that might be present in the related queries DataFrame + if 'query' not in formatted_data['related_queries'].columns: + if 'Top query' in formatted_data['related_queries'].columns: + formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={'Top query': 'query'}) + elif 'Rising query' in formatted_data['related_queries'].columns: + formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={'Rising query': 'query'}) + elif 'query' not in formatted_data['related_queries'].columns and len(formatted_data['related_queries'].columns) > 0: + # If we have a DataFrame but no 'query' column, use the first column as 'query' + first_col = formatted_data['related_queries'].columns[0] + formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={first_col: 'query'}) + + if 'value' not in formatted_data['related_queries'].columns and len(formatted_data['related_queries'].columns) > 1: + # If we have a second column, use it as 'value' + second_col = formatted_data['related_queries'].columns[1] + formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={second_col: 'value'}) + elif 'value' not in formatted_data['related_queries'].columns: + # If no 'value' column exists, add one with default values + formatted_data['related_queries']['value'] = 0 + + if not formatted_data['related_topics'].empty: + # Handle different column names that might be present in the related topics DataFrame + if 'topic' not in formatted_data['related_topics'].columns: + if 'topic_title' in formatted_data['related_topics'].columns: + formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={'topic_title': 'topic'}) + elif 'topic' not in formatted_data['related_topics'].columns and len(formatted_data['related_topics'].columns) > 0: + # If we have a DataFrame but no 'topic' column, use the first column as 'topic' + first_col = formatted_data['related_topics'].columns[0] + formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={first_col: 'topic'}) + + if 'value' not in formatted_data['related_topics'].columns and len(formatted_data['related_topics'].columns) > 1: + # If we have a second column, use it as 'value' + second_col = formatted_data['related_topics'].columns[1] + formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={second_col: 'value'}) + elif 'value' not in formatted_data['related_topics'].columns: + # If no 'value' column exists, add one with default values + formatted_data['related_topics']['value'] = 0 + + # Clear the progress container after completion + progress_container.empty() + progress_bar.empty() + + return formatted_data + + except Exception as e: + logger.error(f"Error in Google Trends analysis: {e}") + update_progress(f"Error in Google Trends analysis: {str(e)}", level="error", progress=100) + # Clear the progress container after error + progress_container.empty() + progress_bar.empty() + return { + 'related_keywords': [], + 'interest_over_time': pd.DataFrame(), + 'regional_interest': pd.DataFrame(), + 'related_queries': pd.DataFrame(), + 'related_topics': pd.DataFrame() + } def metaphor_extract_titles_or_text(json_data, return_titles=True): diff --git a/lib/ai_web_researcher/metaphor_basic_neural_web_search.py b/lib/ai_web_researcher/metaphor_basic_neural_web_search.py index 209bab1e..8ae72836 100644 --- a/lib/ai_web_researcher/metaphor_basic_neural_web_search.py +++ b/lib/ai_web_researcher/metaphor_basic_neural_web_search.py @@ -262,285 +262,185 @@ def metaphor_search_articles(query, search_options: dict = None): except Exception as tavily_err: logger.warning(f"Error getting Tavily answer: {tavily_err}") - # Display results in Streamlit - streamlit_display_metaphor_results(formatted_response) + # Return the formatted response without displaying it + # The display will be handled by gpt_web_researcher return formatted_response except Exception as e: logger.error(f"Error in Exa searching articles: {e}") return None -def streamlit_display_metaphor_results(metaphor_response: dict): - """ - Display Metaphor search results in Streamlit with enhanced metrics and popovers +def streamlit_display_metaphor_results(metaphor_response, search_keywords=None): + """Display Metaphor search results in Streamlit.""" - Args: - metaphor_response (dict): Response from Metaphor search - """ - if not metaphor_response or 'data' not in metaphor_response: - st.error("No valid Metaphor search results to display") + if not metaphor_response: + st.error("No search results found.") return - + + # Add debug logging + logger.debug(f"Displaying Metaphor results. Type: {type(metaphor_response)}") + if isinstance(metaphor_response, dict): + logger.debug(f"Metaphor response keys: {metaphor_response.keys()}") + # Initialize session state variables if they don't exist if 'search_insights' not in st.session_state: st.session_state.search_insights = None - if 'metaphor_response' not in st.session_state: - st.session_state.metaphor_response = metaphor_response - + st.session_state.metaphor_response = None if 'insights_generated' not in st.session_state: st.session_state.insights_generated = False - # Update the stored metaphor_response with the latest data + # Store the current response in session state st.session_state.metaphor_response = metaphor_response - - # Display metrics in columns - col1, col2, col3 = st.columns(3) - # Calculate metrics - results = metaphor_response['data']['results'] + # Display search results + st.subheader("š Search Results") + + # Calculate metrics - handle different data structures + results = [] + if isinstance(metaphor_response, dict): + if 'data' in metaphor_response and 'results' in metaphor_response['data']: + results = metaphor_response['data']['results'] + elif 'results' in metaphor_response: + results = metaphor_response['results'] + total_results = len(results) - avg_score = sum(r['score'] for r in results if r['score']) / total_results if total_results > 0 else 0 + avg_relevance = sum(r.get('score', 0) for r in results) / total_results if total_results > 0 else 0 + # Display metrics + col1, col2 = st.columns(2) with col1: - st.metric( - label="Total Results", - value=total_results - ) + st.metric("Total Results", total_results) with col2: - if metaphor_response['data'].get('costDollars'): - cost = metaphor_response['data']['costDollars'] - st.metric( - label="Search Cost", - value=f"${cost['total']:.3f}" - ) - with col3: - st.metric( - label="Average Relevance Score", - value=f"{avg_score:.2f}" - ) - - # Display AI-generated answers side by side - if 'answer' in metaphor_response or 'tavily_answer' in metaphor_response: - st.markdown("### š¤ AI-Generated Research Answers") - - # Create two columns for side-by-side display - tavily_col, metaphor_col = st.columns(2) - - # Display Tavily answer if available - with tavily_col: - if 'tavily_answer' in metaphor_response: - st.markdown("#### š Tavily AI Answer") - st.markdown(f""" -