diff --git a/lib/ai_web_researcher/google_trends_researcher.py b/lib/ai_web_researcher/google_trends_researcher.py index 6e2dde5d..2088d85f 100644 --- a/lib/ai_web_researcher/google_trends_researcher.py +++ b/lib/ai_web_researcher/google_trends_researcher.py @@ -105,124 +105,55 @@ def plot_interest_by_region(kw_list): -def get_related_queries_and_save_csv(keywords, hl='en-US', tz=360, cat=0, timeframe='today 12-m'): - """ - Get related queries for the given search keywords and save the result to a CSV file. - - Args: - search_keywords (list): List of search keywords. - hl (str): Language parameter, default is 'en-US'. - tz (int): Timezone parameter, default is 360. - cat (int): Category parameter, default is 0. - timeframe (str): Timeframe parameter, default is 'today 12-m'. - - Returns: - pd.DataFrame: DataFrame containing related queries. - """ - try: - # Build model - pytrends = TrendReq(hl=hl, tz=tz) - pytrends.build_payload(kw_list=keywords, cat=cat, timeframe=timeframe) - - # Get related queries - data = pytrends.related_queries() - - # Extract data from the result - top_queries = list(data.values())[0]['top'] - rising_queries = list(data.values())[0]['rising'] - top_rising_queries = top_queries + rising_queries - - # Convert lists to DataFrames - df_top_queries = pd.DataFrame(top_queries) - df_rising_queries = pd.DataFrame(rising_queries) # Added this line - - # Rename columns to avoid duplicates - df_top_queries.columns = ['Top query', 'value'] - df_rising_queries.columns = ['Rising query', 'value'] - - # Save to CSV - all_queries_df = pd.concat([df_top_queries, df_rising_queries], axis=1) - #all_queries_df.to_csv('related_queries.csv', index=False) - - # Display additional information - console = Console() - # Display additional information with emojis and bold formatting - print("\nšŸ“¢ā—šŸšØ ") - print("\n\033[1mšŸ” Top\033[0m: The most popular search queries. Scoring is on a relative scale where a value of 100 is the most commonly searched query, 50 is a query searched half as often, and a value of 0 is a query searched for less than 1% as often as the most popular query.\n") - print("\n\033[1mšŸš€ Rising\033[0m: Queries with the biggest increase in search frequency since the last time period. Results marked 'Breakout' had a tremendous increase, probably because these queries are new and had few (if any) prior searches.\n") - # Display the DataFrame using tabulate - table = tabulate(all_queries_df, headers='keys', tablefmt='fancy_grid') - print(table) - # Save the combined table to a file - try: - save_in_file(table) - except Exception as save_results_err: - logger.error(f"Failed to save search results: {save_results_err}") - return top_rising_queries - - except Exception as e: - print(f"get_related_queries_and_save_csv: ERROR: An error occurred: {e}") - - def get_related_topics_and_save_csv(search_keywords): - """ - Get related topics for the given search keywords and save the result to a CSV file. - - Args: - search_keywords (list): List of search keywords. - - Returns: - pd.DataFrame: DataFrame containing related topics. - """ + search_keywords = [f"{search_keywords}"] try: - # Build model pytrends = TrendReq(hl='en-US', tz=360) + pytrends.build_payload(kw_list=search_keywords, timeframe='today 12-m') - # Build payload - # FIXME: Remove hardcoding. - pytrends.build_payload(search_keywords, cat=0, timeframe='today 12-m') - - # Get related topics - try: - data = pytrends.related_topics() - except Exception as err: - logger.error(f"Failed to get pytrends realted topics: {err}") - return None - - # Extract data from the result - top_topics = list(data.values())[0]['top'] - rising_topics = list(data.values())[0]['rising'] + # Get related topics - this returns a dictionary + topics_data = pytrends.related_topics() - # Convert lists to DataFrames - df_top_topics = pd.DataFrame(top_topics) - df_rising_topics = pd.DataFrame(rising_topics) - - # FIXME:Exclude specified columns - columns_to_exclude = ['hasData', 'value', 'topic_mid', 'link'] - df_top_topics = df_top_topics.drop(columns=columns_to_exclude, errors='ignore') - df_rising_topics = df_rising_topics.drop(columns=columns_to_exclude, errors='ignore') - - # Rename columns to avoid duplicates and provide meaningful names - df_top_topics.columns = ['Top- ' + col if col != 'topic_title' else col for col in df_top_topics.columns] - df_rising_topics.columns = ['Rising- ' + col if col != 'topic_title' else col for col in df_rising_topics.columns] - all_topics_df = pd.concat([df_top_topics, df_rising_topics], axis=1) - - print(f"\n\n šŸ“¢ā—šŸšØ Rising and Trending Keywords for {search_keywords}\n") - print("\033[1mšŸ” Top\033[0m: The most popular search topics.") - print("\033[1mšŸš€ Rising\033[0m: Topics experiencing a significant increase in search frequency since the last time period. Topics marked :pile_of_poop:'Breakout' had a tremendous surge, likely because they are new and had few prior searches.") - # Display the DataFrame using tabulate - pd.set_option('display.max_rows', all_topics_df.shape[0]+1) - print(all_topics_df.head(10)) - table = tabulate(all_topics_df, headers='keys', tablefmt='fancy_grid') - try: - save_in_file(table) - except Exception as save_results_err: - logger.error(f"Failed to save search results: {save_results_err}") - return all_topics_df - + # Extract data for the first keyword + if topics_data and search_keywords[0] in topics_data: + keyword_data = topics_data[search_keywords[0]] + + # Create two separate dataframes for top and rising + top_df = keyword_data.get('top', pd.DataFrame()) + rising_df = keyword_data.get('rising', pd.DataFrame()) + + return { + 'top': top_df[['topic_title', 'value']] if not top_df.empty else pd.DataFrame(), + 'rising': rising_df[['topic_title', 'value']] if not rising_df.empty else pd.DataFrame() + } except Exception as e: - logger.error(f"ERROR: An error occurred in related topics: {e}") - return pd.DataFrame() + logger.error(f"Error in related topics: {e}") + return {'top': pd.DataFrame(), 'rising': pd.DataFrame()} + +def get_related_queries_and_save_csv(search_keywords): + search_keywords = [f"{search_keywords}"] + try: + pytrends = TrendReq(hl='en-US', tz=360) + pytrends.build_payload(kw_list=search_keywords, timeframe='today 12-m') + + # Get related queries - this returns a dictionary + queries_data = pytrends.related_queries() + + # Extract data for the first keyword + if queries_data and search_keywords[0] in queries_data: + keyword_data = queries_data[search_keywords[0]] + + # Create two separate dataframes for top and rising + top_df = keyword_data.get('top', pd.DataFrame()) + rising_df = keyword_data.get('rising', pd.DataFrame()) + + return { + 'top': top_df if not top_df.empty else pd.DataFrame(), + 'rising': rising_df if not rising_df.empty else pd.DataFrame() + } + except Exception as e: + logger.error(f"Error in related queries: {e}") + return {'top': pd.DataFrame(), 'rising': pd.DataFrame()} def get_source(url): @@ -507,22 +438,17 @@ def do_google_trends_analysis(search_term): else: all_the_keywords.append(suggestions_df['Keywords'].tolist()) all_the_keywords = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in all_the_keywords]) + # Generate a random sleep time between 2 and 3 seconds time.sleep(random.uniform(2, 3)) - -# -# # FIXME: Get result from vision GPT. Fetch and visualize Google Trends data -# #trends_data = fetch_google_trends_interest_overtime("llamaindex") -# -# # FIXME: Plot Interest Over time. -# result_df = plot_interest_by_region(search_term) -# + # Display additional information try: result_df = get_related_topics_and_save_csv(search_term) + logger.info(f"Related topics:: result_df: {result_df}") # Extract 'Top' topic_title if result_df: - top_topic_title = result_df['topic_title'].values.tolist() + top_topic_title = result_df['top']['topic_title'].values.tolist() # Join each sublist into one string separated by comma #top_topic_title = [','.join(filter(None, map(str, sublist))) for sublist in top_topic_title] top_topic_title = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in top_topic_title]) @@ -551,3 +477,77 @@ def do_google_trends_analysis(search_term): return(all_the_keywords) except Exception as e: logger.error(f"Error in Google Trends Analysis: {e}") + + +def get_trending_searches(country='united_states'): + """Get trending searches for a specific country.""" + try: + pytrends = TrendReq(hl='en-US', tz=360) + trending_searches = pytrends.trending_searches(pn=country) + return trending_searches + except Exception as e: + logger.error(f"Error getting trending searches: {e}") + return pd.DataFrame() + +def display_trending_searches(trending_df): + """Display trending searches in the UI.""" + if trending_df.empty: + st.info("No trending searches data available.") + return + + st.subheader("šŸ“Š Trending Searches") + + # Display as numbered list with emojis + for idx, search in enumerate(trending_df[0].head(10), 1): + st.write(f"{idx}. šŸ” {search}") + +def get_realtime_trends(country='US'): + """Get realtime trending searches for a specific country.""" + try: + pytrends = TrendReq(hl='en-US', tz=360) + realtime_trends = pytrends.realtime_trending_searches(pn=country) + return realtime_trends + except Exception as e: + logger.error(f"Error getting realtime trends: {e}") + return pd.DataFrame() + +def display_realtime_trends(trends_df): + """Display realtime trending searches in the UI.""" + if trends_df.empty: + st.info("No realtime trends data available.") + return + + st.subheader("⚔ Realtime Trends") + + # Create tabs for different categories + if not trends_df.empty: + # Display top 5 trends with their titles and articles + for _, row in trends_df.head(5).iterrows(): + with st.expander(f"šŸ”„ {row.get('title', 'Trending Topic')}"): + st.write(f"**Traffic:** {row.get('traffic', 'N/A')}") + if 'articles' in row: + st.write("šŸ“° Related Articles:") + for article in row['articles'][:3]: # Show top 3 articles + st.write(f"- {article['title']}") + +def display_google_trends_data(trends_data, search_keyword): + # ... existing code ... + + # Create tabs for different sections + tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([ + "Related Keywords", + "Interest Over Time", + "Regional Interest", + "Related Queries", + "Related Topics", + "Trending Now" + ]) + + # ... existing tab code ... + + with tab6: + col1, col2 = st.columns(2) + with col1: + display_trending_searches(trends_data.get('trending_searches', pd.DataFrame())) + with col2: + display_realtime_trends(trends_data.get('realtime_trends', pd.DataFrame())) diff --git a/lib/ai_web_researcher/gpt_online_researcher.py b/lib/ai_web_researcher/gpt_online_researcher.py index d2238143..0719680f 100644 --- a/lib/ai_web_researcher/gpt_online_researcher.py +++ b/lib/ai_web_researcher/gpt_online_researcher.py @@ -27,15 +27,19 @@ from pathlib import Path import sys from datetime import datetime import streamlit as st +import pandas as pd +import random +import numpy as np from lib.alwrity_ui.display_google_serp_results import ( process_research_results, process_search_results, display_research_results ) +from lib.alwrity_ui.google_trends_ui import display_google_trends_data, process_trends_data from .tavily_ai_search import get_tavilyai_results -from .metaphor_basic_neural_web_search import metaphor_search_articles +from .metaphor_basic_neural_web_search import metaphor_search_articles, streamlit_display_metaphor_results from .google_serp_search import google_search from .google_trends_researcher import do_google_trends_analysis #from .google_gemini_web_researcher import do_gemini_web_research @@ -56,6 +60,10 @@ def gpt_web_researcher(search_keywords, search_mode, **kwargs): logger.debug(f"Additional parameters: {kwargs}") try: + # Reset session state variables for this research operation + if 'metaphor_results_displayed' in st.session_state: + del st.session_state.metaphor_results_displayed + # Initialize result container research_results = None @@ -157,13 +165,76 @@ def gpt_web_researcher(search_keywords, search_mode, **kwargs): update_progress("Metaphor AI search failed, continuing with Tavily results only...", level="warning") else: update_progress("Metaphor AI search completed successfully", progress=75) + # Add debug logging to check the structure of metaphor_results + logger.debug(f"Metaphor results structure: {type(metaphor_results)}") + if isinstance(metaphor_results, dict): + logger.debug(f"Metaphor results keys: {metaphor_results.keys()}") + if 'data' in metaphor_results: + logger.debug(f"Metaphor data keys: {metaphor_results['data'].keys()}") + if 'results' in metaphor_results['data']: + logger.debug(f"Number of results: {len(metaphor_results['data']['results'])}") + + # Display Metaphor results only if not already displayed + if 'metaphor_results_displayed' not in st.session_state: + st.session_state.metaphor_results_displayed = True + # Make sure to pass the correct parameters to streamlit_display_metaphor_results + streamlit_display_metaphor_results(metaphor_results, search_keywords) + + # Add Google Trends Analysis + update_progress("Initiating Google Trends analysis...", progress=80) + try: + # Add an informative message about Google Trends + with st.expander("ā„¹ļø About Google Trends Analysis", expanded=False): + st.markdown(""" + **What is Google Trends Analysis?** + + Google Trends Analysis provides insights into how often a particular search-term is entered relative to the total search-volume across various regions of the world, and in various languages. + + **What data will be shown?** + + - **Related Keywords**: Terms that are frequently searched together with your keyword + - **Interest Over Time**: How interest in your keyword has changed over the past 12 months + - **Regional Interest**: Where in the world your keyword is most popular + - **Related Queries**: What people search for before and after searching for your keyword + - **Related Topics**: Topics that are closely related to your keyword + + **How to use this data:** + + - Identify trending topics in your industry + - Understand seasonal patterns in search behavior + - Discover related keywords for content planning + - Target content to specific regions with high interest + """) + + trends_results = do_google_pytrends_analysis(search_keywords) + if trends_results: + update_progress("Google Trends analysis completed successfully", progress=90) + # Store trends results in the research_results + if metaphor_results: + metaphor_results['trends_data'] = trends_results + else: + # If metaphor_results is None, create a new container for results + metaphor_results = {'trends_data': trends_results} + + # Display Google Trends data using the new UI module + display_google_trends_data(trends_results, search_keywords) + else: + update_progress("Google Trends analysis returned no results", level="warning") + except Exception as trends_err: + logger.error(f"Google Trends analysis failed: {trends_err}") + update_progress("Google Trends analysis failed", level="warning") + st.error(f"Error in Google Trends analysis: {str(trends_err)}") + + # Return the combined results + update_progress("Research completed!", progress=100, level="success") + return metaphor_results or t_results except Exception as ai_err: error_msg = f"AI research pipeline failed: {str(ai_err)}" logger.error(error_msg, exc_info=True) update_progress(error_msg, level="error") raise - + else: error_msg = f"Unsupported search mode: {search_mode}" logger.error(error_msg) @@ -316,13 +387,355 @@ def do_metaphor_ai_research(search_keywords): return None, None -def do_google_pytrends_analysis(search_keywords): - """ """ +def do_google_pytrends_analysis(keywords): + """ + Perform Google Trends analysis for the given keywords. + + Args: + keywords (str): The search keywords to analyze + + Returns: + dict: A dictionary containing formatted Google Trends data with the following keys: + - related_keywords: List of related keywords + - interest_over_time: DataFrame with date and interest columns + - regional_interest: DataFrame with country_code, country, and interest columns + - related_queries: DataFrame with query and value columns + - related_topics: DataFrame with topic and value columns + """ + logger.info(f"Performing Google Trends analysis for keywords: {keywords}") + + # Create a progress container for Streamlit + progress_container = st.empty() + progress_bar = st.progress(0) + + def update_progress(message, progress=None, level="info"): + """Helper function to update progress in Streamlit UI""" + if progress is not None: + progress_bar.progress(progress) + + if level == "error": + progress_container.error(f"🚫 {message}") + elif level == "warning": + progress_container.warning(f"āš ļø {message}") + else: + progress_container.info(f"šŸ”„ {message}") + logger.debug(f"Progress update [{level}]: {message}") + try: - logger.info(f"Do Google Trends analysis for given keywords: {search_keywords}") - return(do_google_trends_analysis(search_keywords)) - except Exception as err: - logger.error(f"Failed to do google trends analysis: {err}") + # Initialize the formatted data dictionary + formatted_data = { + 'related_keywords': [], + 'interest_over_time': pd.DataFrame(), + 'regional_interest': pd.DataFrame(), + 'related_queries': pd.DataFrame(), + 'related_topics': pd.DataFrame() + } + + # Get raw trends data from google_trends_researcher + update_progress("Fetching Google Trends data...", progress=10) + raw_trends_data = do_google_trends_analysis(keywords) + + if not raw_trends_data: + logger.warning("No Google Trends data returned") + update_progress("No Google Trends data returned", level="warning", progress=20) + return formatted_data + + # Process related keywords from the raw data + update_progress("Processing related keywords...", progress=30) + if isinstance(raw_trends_data, list): + formatted_data['related_keywords'] = raw_trends_data + elif isinstance(raw_trends_data, dict): + if 'keywords' in raw_trends_data: + formatted_data['related_keywords'] = raw_trends_data['keywords'] + if 'interest_over_time' in raw_trends_data: + formatted_data['interest_over_time'] = raw_trends_data['interest_over_time'] + if 'regional_interest' in raw_trends_data: + formatted_data['regional_interest'] = raw_trends_data['regional_interest'] + if 'related_queries' in raw_trends_data: + formatted_data['related_queries'] = raw_trends_data['related_queries'] + if 'related_topics' in raw_trends_data: + formatted_data['related_topics'] = raw_trends_data['related_topics'] + + # If we have keywords but missing other data, try to fetch them using pytrends directly + if formatted_data['related_keywords'] and ( + formatted_data['interest_over_time'].empty or + formatted_data['regional_interest'].empty or + formatted_data['related_queries'].empty or + formatted_data['related_topics'].empty + ): + try: + update_progress("Fetching additional data from Google Trends API...", progress=40) + from pytrends.request import TrendReq + pytrends = TrendReq(hl='en-US', tz=360) + + # Build payload with the main keyword + update_progress("Building search payload...", progress=45) + pytrends.build_payload([keywords], timeframe='today 12-m', geo='') + + # Get interest over time if missing + if formatted_data['interest_over_time'].empty: + try: + update_progress("Fetching interest over time data...", progress=50) + interest_df = pytrends.interest_over_time() + if not interest_df.empty: + formatted_data['interest_over_time'] = interest_df.reset_index() + update_progress(f"Successfully fetched interest over time data with {len(formatted_data['interest_over_time'])} data points", progress=55) + else: + update_progress("No interest over time data available", level="warning", progress=55) + except Exception as e: + logger.error(f"Error fetching interest over time: {e}") + update_progress(f"Error fetching interest over time: {str(e)}", level="warning", progress=55) + + # Get regional interest if missing + if formatted_data['regional_interest'].empty: + try: + update_progress("Fetching regional interest data...", progress=60) + regional_df = pytrends.interest_by_region() + if not regional_df.empty: + formatted_data['regional_interest'] = regional_df.reset_index() + update_progress(f"Successfully fetched regional interest data for {len(formatted_data['regional_interest'])} regions", progress=65) + else: + update_progress("No regional interest data available", level="warning", progress=65) + except Exception as e: + logger.error(f"Error fetching regional interest: {e}") + update_progress(f"Error fetching regional interest: {str(e)}", level="warning", progress=65) + + # Get related queries if missing + if formatted_data['related_queries'].empty: + try: + update_progress("Fetching related queries data...", progress=70) + # Get related queries data + related_queries = pytrends.related_queries() + + # Create empty DataFrame as fallback + formatted_data['related_queries'] = pd.DataFrame(columns=['query', 'value']) + + # Simple direct approach to avoid list index errors + if related_queries and isinstance(related_queries, dict): + # Check if our keyword exists in the results + if keywords in related_queries: + keyword_data = related_queries[keywords] + + # Process top queries if available + if 'top' in keyword_data and keyword_data['top'] is not None: + try: + update_progress("Processing top related queries...", progress=75) + # Convert to DataFrame if it's not already + if isinstance(keyword_data['top'], pd.DataFrame): + top_df = keyword_data['top'] + else: + # Try to convert to DataFrame + top_df = pd.DataFrame(keyword_data['top']) + + # Ensure it has the right columns + if not top_df.empty: + # Rename columns if needed + if 'query' in top_df.columns: + # Already has the right column name + pass + elif len(top_df.columns) > 0: + # Use first column as query + top_df = top_df.rename(columns={top_df.columns[0]: 'query'}) + + # Add to our results + formatted_data['related_queries'] = top_df + update_progress(f"Successfully processed {len(top_df)} top related queries", progress=80) + except Exception as e: + logger.warning(f"Error processing top queries: {e}") + update_progress(f"Error processing top queries: {str(e)}", level="warning", progress=80) + + # Process rising queries if available + if 'rising' in keyword_data and keyword_data['rising'] is not None: + try: + update_progress("Processing rising related queries...", progress=85) + # Convert to DataFrame if it's not already + if isinstance(keyword_data['rising'], pd.DataFrame): + rising_df = keyword_data['rising'] + else: + # Try to convert to DataFrame + rising_df = pd.DataFrame(keyword_data['rising']) + + # Ensure it has the right columns + if not rising_df.empty: + # Rename columns if needed + if 'query' in rising_df.columns: + # Already has the right column name + pass + elif len(rising_df.columns) > 0: + # Use first column as query + rising_df = rising_df.rename(columns={rising_df.columns[0]: 'query'}) + + # Combine with existing data if we have any + if not formatted_data['related_queries'].empty: + formatted_data['related_queries'] = pd.concat([formatted_data['related_queries'], rising_df]) + update_progress(f"Successfully processed {len(rising_df)} rising related queries", progress=90) + else: + formatted_data['related_queries'] = rising_df + update_progress(f"Successfully processed {len(rising_df)} rising related queries", progress=90) + except Exception as e: + logger.warning(f"Error processing rising queries: {e}") + update_progress(f"Error processing rising queries: {str(e)}", level="warning", progress=90) + except Exception as e: + logger.error(f"Error fetching related queries: {e}") + update_progress(f"Error fetching related queries: {str(e)}", level="warning", progress=90) + # Ensure we have an empty DataFrame with the right columns + formatted_data['related_queries'] = pd.DataFrame(columns=['query', 'value']) + + # Get related topics if missing + if formatted_data['related_topics'].empty: + try: + update_progress("Fetching related topics data...", progress=95) + # Get related topics data + related_topics = pytrends.related_topics() + + # Create empty DataFrame as fallback + formatted_data['related_topics'] = pd.DataFrame(columns=['topic', 'value']) + + # Simple direct approach to avoid list index errors + if related_topics and isinstance(related_topics, dict): + # Check if our keyword exists in the results + if keywords in related_topics: + keyword_data = related_topics[keywords] + + # Process top topics if available + if 'top' in keyword_data and keyword_data['top'] is not None: + try: + update_progress("Processing top related topics...", progress=97) + # Convert to DataFrame if it's not already + if isinstance(keyword_data['top'], pd.DataFrame): + top_df = keyword_data['top'] + else: + # Try to convert to DataFrame + top_df = pd.DataFrame(keyword_data['top']) + + # Ensure it has the right columns + if not top_df.empty: + # Rename columns if needed + if 'topic_title' in top_df.columns: + top_df = top_df.rename(columns={'topic_title': 'topic'}) + elif len(top_df.columns) > 0 and 'topic' not in top_df.columns: + # Use first column as topic + top_df = top_df.rename(columns={top_df.columns[0]: 'topic'}) + + # Add to our results + formatted_data['related_topics'] = top_df + update_progress(f"Successfully processed {len(top_df)} top related topics", progress=98) + except Exception as e: + logger.warning(f"Error processing top topics: {e}") + update_progress(f"Error processing top topics: {str(e)}", level="warning", progress=98) + + # Process rising topics if available + if 'rising' in keyword_data and keyword_data['rising'] is not None: + try: + update_progress("Processing rising related topics...", progress=99) + # Convert to DataFrame if it's not already + if isinstance(keyword_data['rising'], pd.DataFrame): + rising_df = keyword_data['rising'] + else: + # Try to convert to DataFrame + rising_df = pd.DataFrame(keyword_data['rising']) + + # Ensure it has the right columns + if not rising_df.empty: + # Rename columns if needed + if 'topic_title' in rising_df.columns: + rising_df = rising_df.rename(columns={'topic_title': 'topic'}) + elif len(rising_df.columns) > 0 and 'topic' not in rising_df.columns: + # Use first column as topic + rising_df = rising_df.rename(columns={rising_df.columns[0]: 'topic'}) + + # Combine with existing data if we have any + if not formatted_data['related_topics'].empty: + formatted_data['related_topics'] = pd.concat([formatted_data['related_topics'], rising_df]) + update_progress(f"Successfully processed {len(rising_df)} rising related topics", progress=100) + else: + formatted_data['related_topics'] = rising_df + update_progress(f"Successfully processed {len(rising_df)} rising related topics", progress=100) + except Exception as e: + logger.warning(f"Error processing rising topics: {e}") + update_progress(f"Error processing rising topics: {str(e)}", level="warning", progress=100) + except Exception as e: + logger.error(f"Error fetching related topics: {e}") + update_progress(f"Error fetching related topics: {str(e)}", level="warning", progress=100) + # Ensure we have an empty DataFrame with the right columns + formatted_data['related_topics'] = pd.DataFrame(columns=['topic', 'value']) + + except Exception as e: + logger.error(f"Error fetching additional trends data: {e}") + update_progress(f"Error fetching additional trends data: {str(e)}", level="warning", progress=100) + + # Ensure all DataFrames have the correct column names for the UI + update_progress("Finalizing data formatting...", progress=100) + + if not formatted_data['interest_over_time'].empty: + if 'date' not in formatted_data['interest_over_time'].columns: + formatted_data['interest_over_time'] = formatted_data['interest_over_time'].reset_index() + if 'interest' not in formatted_data['interest_over_time'].columns and keywords in formatted_data['interest_over_time'].columns: + formatted_data['interest_over_time'] = formatted_data['interest_over_time'].rename(columns={keywords: 'interest'}) + + if not formatted_data['regional_interest'].empty: + if 'country_code' not in formatted_data['regional_interest'].columns and 'geoName' in formatted_data['regional_interest'].columns: + formatted_data['regional_interest'] = formatted_data['regional_interest'].rename(columns={'geoName': 'country_code'}) + if 'interest' not in formatted_data['regional_interest'].columns and keywords in formatted_data['regional_interest'].columns: + formatted_data['regional_interest'] = formatted_data['regional_interest'].rename(columns={keywords: 'interest'}) + + if not formatted_data['related_queries'].empty: + # Handle different column names that might be present in the related queries DataFrame + if 'query' not in formatted_data['related_queries'].columns: + if 'Top query' in formatted_data['related_queries'].columns: + formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={'Top query': 'query'}) + elif 'Rising query' in formatted_data['related_queries'].columns: + formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={'Rising query': 'query'}) + elif 'query' not in formatted_data['related_queries'].columns and len(formatted_data['related_queries'].columns) > 0: + # If we have a DataFrame but no 'query' column, use the first column as 'query' + first_col = formatted_data['related_queries'].columns[0] + formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={first_col: 'query'}) + + if 'value' not in formatted_data['related_queries'].columns and len(formatted_data['related_queries'].columns) > 1: + # If we have a second column, use it as 'value' + second_col = formatted_data['related_queries'].columns[1] + formatted_data['related_queries'] = formatted_data['related_queries'].rename(columns={second_col: 'value'}) + elif 'value' not in formatted_data['related_queries'].columns: + # If no 'value' column exists, add one with default values + formatted_data['related_queries']['value'] = 0 + + if not formatted_data['related_topics'].empty: + # Handle different column names that might be present in the related topics DataFrame + if 'topic' not in formatted_data['related_topics'].columns: + if 'topic_title' in formatted_data['related_topics'].columns: + formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={'topic_title': 'topic'}) + elif 'topic' not in formatted_data['related_topics'].columns and len(formatted_data['related_topics'].columns) > 0: + # If we have a DataFrame but no 'topic' column, use the first column as 'topic' + first_col = formatted_data['related_topics'].columns[0] + formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={first_col: 'topic'}) + + if 'value' not in formatted_data['related_topics'].columns and len(formatted_data['related_topics'].columns) > 1: + # If we have a second column, use it as 'value' + second_col = formatted_data['related_topics'].columns[1] + formatted_data['related_topics'] = formatted_data['related_topics'].rename(columns={second_col: 'value'}) + elif 'value' not in formatted_data['related_topics'].columns: + # If no 'value' column exists, add one with default values + formatted_data['related_topics']['value'] = 0 + + # Clear the progress container after completion + progress_container.empty() + progress_bar.empty() + + return formatted_data + + except Exception as e: + logger.error(f"Error in Google Trends analysis: {e}") + update_progress(f"Error in Google Trends analysis: {str(e)}", level="error", progress=100) + # Clear the progress container after error + progress_container.empty() + progress_bar.empty() + return { + 'related_keywords': [], + 'interest_over_time': pd.DataFrame(), + 'regional_interest': pd.DataFrame(), + 'related_queries': pd.DataFrame(), + 'related_topics': pd.DataFrame() + } def metaphor_extract_titles_or_text(json_data, return_titles=True): diff --git a/lib/ai_web_researcher/metaphor_basic_neural_web_search.py b/lib/ai_web_researcher/metaphor_basic_neural_web_search.py index 209bab1e..8ae72836 100644 --- a/lib/ai_web_researcher/metaphor_basic_neural_web_search.py +++ b/lib/ai_web_researcher/metaphor_basic_neural_web_search.py @@ -262,285 +262,185 @@ def metaphor_search_articles(query, search_options: dict = None): except Exception as tavily_err: logger.warning(f"Error getting Tavily answer: {tavily_err}") - # Display results in Streamlit - streamlit_display_metaphor_results(formatted_response) + # Return the formatted response without displaying it + # The display will be handled by gpt_web_researcher return formatted_response except Exception as e: logger.error(f"Error in Exa searching articles: {e}") return None -def streamlit_display_metaphor_results(metaphor_response: dict): - """ - Display Metaphor search results in Streamlit with enhanced metrics and popovers +def streamlit_display_metaphor_results(metaphor_response, search_keywords=None): + """Display Metaphor search results in Streamlit.""" - Args: - metaphor_response (dict): Response from Metaphor search - """ - if not metaphor_response or 'data' not in metaphor_response: - st.error("No valid Metaphor search results to display") + if not metaphor_response: + st.error("No search results found.") return - + + # Add debug logging + logger.debug(f"Displaying Metaphor results. Type: {type(metaphor_response)}") + if isinstance(metaphor_response, dict): + logger.debug(f"Metaphor response keys: {metaphor_response.keys()}") + # Initialize session state variables if they don't exist if 'search_insights' not in st.session_state: st.session_state.search_insights = None - if 'metaphor_response' not in st.session_state: - st.session_state.metaphor_response = metaphor_response - + st.session_state.metaphor_response = None if 'insights_generated' not in st.session_state: st.session_state.insights_generated = False - # Update the stored metaphor_response with the latest data + # Store the current response in session state st.session_state.metaphor_response = metaphor_response - - # Display metrics in columns - col1, col2, col3 = st.columns(3) - # Calculate metrics - results = metaphor_response['data']['results'] + # Display search results + st.subheader("šŸ” Search Results") + + # Calculate metrics - handle different data structures + results = [] + if isinstance(metaphor_response, dict): + if 'data' in metaphor_response and 'results' in metaphor_response['data']: + results = metaphor_response['data']['results'] + elif 'results' in metaphor_response: + results = metaphor_response['results'] + total_results = len(results) - avg_score = sum(r['score'] for r in results if r['score']) / total_results if total_results > 0 else 0 + avg_relevance = sum(r.get('score', 0) for r in results) / total_results if total_results > 0 else 0 + # Display metrics + col1, col2 = st.columns(2) with col1: - st.metric( - label="Total Results", - value=total_results - ) + st.metric("Total Results", total_results) with col2: - if metaphor_response['data'].get('costDollars'): - cost = metaphor_response['data']['costDollars'] - st.metric( - label="Search Cost", - value=f"${cost['total']:.3f}" - ) - with col3: - st.metric( - label="Average Relevance Score", - value=f"{avg_score:.2f}" - ) - - # Display AI-generated answers side by side - if 'answer' in metaphor_response or 'tavily_answer' in metaphor_response: - st.markdown("### šŸ¤– AI-Generated Research Answers") - - # Create two columns for side-by-side display - tavily_col, metaphor_col = st.columns(2) - - # Display Tavily answer if available - with tavily_col: - if 'tavily_answer' in metaphor_response: - st.markdown("#### šŸ” Tavily AI Answer") - st.markdown(f""" -
- {metaphor_response['tavily_answer']} -
- """, unsafe_allow_html=True) - - if metaphor_response.get('tavily_cost_dollars'): - st.caption(f"Tavily Answer Cost: ${metaphor_response['tavily_cost_dollars']['total']:.3f}") - - if metaphor_response.get('tavily_citations'): - with st.expander("šŸ“š Tavily Sources"): - for idx, citation in enumerate(metaphor_response['tavily_citations'], 1): - st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})") - else: - st.markdown("#### šŸ” Tavily AI Answer") - st.info("No Tavily answer available for this query.") - - # Display Metaphor answer if available - with metaphor_col: - if 'answer' in metaphor_response: - st.markdown("#### šŸ” Metaphor AI Answer") - st.markdown(f""" -
- {metaphor_response['answer']} -
- """, unsafe_allow_html=True) - - if metaphor_response.get('answerCostDollars'): - st.caption(f"Metaphor Answer Cost: ${metaphor_response['answerCostDollars']['total']:.3f}") - - if metaphor_response.get('citations'): - with st.expander("šŸ“š Metaphor Sources"): - for idx, citation in enumerate(metaphor_response['citations'], 1): - st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})") - else: - st.markdown("#### šŸ” Metaphor AI Answer") - st.info("No Metaphor answer available for this query.") + st.metric("Average Relevance Score", f"{avg_relevance:.2f}") - # Add "Get Search Insights" button - moved outside the AI answers conditional - st.markdown("### šŸ” Search Insights") + # Display AI-generated answers if available + if 'tavily_answer' in metaphor_response or 'metaphor_answer' in metaphor_response: + st.subheader("šŸ¤– AI-Generated Answers") + + if 'tavily_answer' in metaphor_response: + st.markdown("**Tavily AI Answer:**") + st.write(metaphor_response['tavily_answer']) + + if 'metaphor_answer' in metaphor_response: + st.markdown("**Metaphor AI Answer:**") + st.write(metaphor_response['metaphor_answer']) - # Create a container for the insights - insights_container = st.container() - - # Use a button with a callback function - if st.button("Generate Search Insights", type="primary"): - # Set a flag in session state to indicate that insights should be generated + # Get Search Insights button + if st.button("Generate Search Insights", key="metaphor_generate_insights_button"): st.session_state.insights_generated = True - - # Store the current metaphor_response in session state - st.session_state.metaphor_response = metaphor_response - - # Redirect to the same page with a query parameter to trigger insights generation - st.experimental_rerun() - - # If insights should be generated, do it in a separate container - if st.session_state.insights_generated: - with insights_container: - with st.spinner("Analyzing search results to generate insights..."): - # Get the stored metaphor_response from session state - stored_response = st.session_state.metaphor_response - stored_results = stored_response['data']['results'] - - # Prepare data for analysis - analysis_data = { - "metaphor_results": stored_results, - "metaphor_answer": stored_response.get("answer", ""), - "tavily_answer": stored_response.get("tavily_answer", ""), - "metaphor_citations": stored_response.get("citations", []), - "tavily_citations": stored_response.get("tavily_citations", []) - } - - # Create the analysis prompt - analysis_prompt = f""" - **Search Intent & User Needs Analysis** - - I have conducted research using both Tavily and Metaphor AI search engines. - Below is the data from both sources: - - **Metaphor AI Answer:** - {analysis_data["metaphor_answer"]} - - **Tavily AI Answer:** - {analysis_data["tavily_answer"]} - - **Search Results:** - {[f"{i+1}. {r['title']} - {r['summary']}" for i, r in enumerate(analysis_data["metaphor_results"])]} - - **Citations:** - {[f"{i+1}. {c.get('title', 'Untitled')} - {c.get('url', 'No URL')}" for i, c in enumerate(analysis_data["metaphor_citations"] + analysis_data["tavily_citations"])]} - - Based on this research data, please provide the following insights: - - **Search Intent & User Needs** - ``` - Review the research data and identify: - 1. The distribution of search intent (categorize as Informational/Commercial/Navigational/Transactional) - 2. Most common user questions and their patterns - 3. Frequently mentioned pain points or challenges - 4. Recurring solutions or approaches to addressing these challenges - 5. Gaps between user questions and available answers - - Present findings in a structured format with percentages and specific examples. - ``` - - Format your response as a comprehensive analysis with clear sections, bullet points, and examples from the research data. - """ - - try: - # Import the llm_text_gen function - import importlib - text_gen_module = importlib.import_module('lib.gpt_providers.text_generation.main_text_generation') - if hasattr(text_gen_module, 'llm_text_gen'): - # Generate insights using llm_text_gen - insights = text_gen_module.llm_text_gen(analysis_prompt) - - # Store insights in session state - st.session_state.search_insights = insights - - # Reset the flag to prevent regeneration on next rerun - st.session_state.insights_generated = False - else: - st.error("Could not find llm_text_gen function in the text generation module.") - except Exception as e: - st.error(f"Error generating insights: {str(e)}") - logger.error(f"Error generating insights: {e}") + st.rerun() # Display insights if they exist in session state if st.session_state.search_insights: - with insights_container: - st.markdown("### šŸ” Search Intent & User Needs Analysis") - st.markdown(st.session_state.search_insights) - - # Create DataFrame from results - df = pd.DataFrame(results) + st.subheader("šŸ” Search Insights") + st.write(st.session_state.search_insights) + + # Display search results in a data editor + st.subheader("šŸ“Š Detailed Results") # Prepare data for display - display_df = df.copy() - display_df['Visit Site'] = display_df['url'] + results_data = [] + for result in results: + result_data = { + 'Title': result.get('title', ''), + 'URL': result.get('url', ''), + 'Snippet': result.get('summary', ''), + 'Relevance Score': result.get('score', 0), + 'Published Date': result.get('publishedDate', '') + } + results_data.append(result_data) - # Format publishedDate as string if it exists - if 'publishedDate' in display_df.columns: - display_df['publishedDate'] = display_df['publishedDate'].apply( - lambda x: x[:10] if isinstance(x, str) else 'N/A' + # Create DataFrame + df = pd.DataFrame(results_data) + + # Display the DataFrame if it's not empty + if not df.empty: + # Configure columns + st.dataframe( + df, + column_config={ + "Title": st.column_config.TextColumn( + "Title", + help="Title of the search result", + width="large", + ), + "URL": st.column_config.LinkColumn( + "URL", + help="Link to the search result", + width="medium", + display_text="Visit Article", + ), + "Snippet": st.column_config.TextColumn( + "Snippet", + help="Summary of the search result", + width="large", + ), + "Relevance Score": st.column_config.NumberColumn( + "Relevance Score", + help="Relevance score of the search result", + format="%.2f", + width="small", + ), + "Published Date": st.column_config.DateColumn( + "Published Date", + help="Publication date of the search result", + width="medium", + ), + }, + hide_index=True, ) - - # Configure columns for data editor - columns = { - 'title': st.column_config.TextColumn( - 'Title', - width='large', - required=True, - ), - 'author': st.column_config.TextColumn( - 'Author', - width='medium', - ), - 'publishedDate': st.column_config.TextColumn( - 'Published Date', - width='medium', - ), - 'score': st.column_config.NumberColumn( - 'Relevance Score', - width='small', - format="%.2f" - ), - 'Visit Site': st.column_config.LinkColumn( - 'Link', - width='small', - display_text='Visit Site', - ), - 'summary': st.column_config.TextColumn( - 'Summary', - width='large', - required=True, - ) - } - - # Display results in data editor - st.data_editor( - display_df, - column_config=columns, - hide_index=True, - num_rows='dynamic', - disabled=True, - column_order=['title', 'author', 'publishedDate', 'score', 'summary', 'Visit Site'] - ) - - # Display detailed summaries with popovers - st.write("### Detailed Summaries") - for idx, result in enumerate(results, 1): - with st.expander(f"šŸ“„ {result['title']}", expanded=False): - col1, col2 = st.columns([3, 1]) - with col1: - st.markdown(f"**Summary**") - st.markdown(result['summary']) - with col2: - st.markdown("**Details**") - st.markdown(f"**Author:** {result['author'] if result['author'] else 'N/A'}") - st.markdown(f"**Published:** {result['publishedDate'][:10] if result['publishedDate'] else 'N/A'}") - st.markdown(f"**Score:** {result['score']:.2f}") - st.markdown(f"[Visit Site]({result['url']})") - - # Display search metadata - st.divider() - col1, col2 = st.columns(2) - with col1: - st.caption(f"Search Type: {metaphor_response['data']['resolvedSearchType']}") - with col2: - st.caption(f"Request ID: {metaphor_response['data']['requestId']}") + + # Add popover for snippets + st.markdown(""" + + """, unsafe_allow_html=True) + + # Display snippets with popover + st.subheader("šŸ“ Snippets") + for i, result in enumerate(results): + snippet = result.get('summary', '') + if snippet: + st.markdown(f""" +
+ {result.get('title', '')} +
+ {snippet} +
+
+ """, unsafe_allow_html=True) + else: + st.info("No detailed results available.") + + # Add a collapsible section for the raw JSON data + with st.expander("Research Results (JSON)", expanded=False): + st.json(metaphor_response) def metaphor_news_summarizer(news_keywords): diff --git a/lib/alwrity_ui/google_trends_ui.py b/lib/alwrity_ui/google_trends_ui.py new file mode 100644 index 00000000..b57e1833 --- /dev/null +++ b/lib/alwrity_ui/google_trends_ui.py @@ -0,0 +1,425 @@ +""" +Module for displaying Google Trends data in the Streamlit UI. + +This module provides functions for visualizing Google Trends data, including: +- Interest over time +- Regional interest +- Related queries +- Related topics +""" + +import streamlit as st +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import logging + +# Set up logging +logger = logging.getLogger(__name__) + +def display_google_trends_data(trends_data, search_keyword): + """ + Display Google Trends data in a structured format with tabs for different sections. + + Args: + trends_data (dict): Dictionary containing Google Trends data + search_keyword (str): The search keyword used for the analysis + """ + if not trends_data: + st.warning("No Google Trends data available for this search.") + return + + st.subheader(f"Google Trends Analysis for '{search_keyword}'") + + # Add an informative message about Google Trends + with st.expander("ā„¹ļø About Google Trends Data", expanded=False): + st.markdown(""" + **What is Google Trends?** + + Google Trends is a public web facility that shows how often a particular search-term is entered relative to the total search-volume across various regions of the world, and in various languages. + + **What data is shown here?** + + - **Related Keywords**: Terms that are frequently searched together with your keyword + - **Interest Over Time**: How interest in your keyword has changed over the past 12 months + - **Regional Interest**: Where in the world your keyword is most popular + - **Related Queries**: What people search for before and after searching for your keyword + - **Related Topics**: Topics that are closely related to your keyword + + **How to interpret the data:** + + - Interest values range from 0 to 100, where 100 is the peak popularity for the term + - A value of 50 means the term is half as popular as the peak + - A value of 0 means there was not enough data for this term + """) + + # Create tabs for different sections + tab1, tab2, tab3, tab4, tab5 = st.tabs([ + "Related Keywords", + "Interest Over Time", + "Regional Interest", + "Related Queries", + "Related Topics" + ]) + + with tab1: + display_keywords_section(trends_data.get('related_keywords', [])) + + with tab2: + display_interest_over_time(trends_data.get('interest_over_time', pd.DataFrame())) + + with tab3: + display_regional_interest(trends_data.get('regional_interest', pd.DataFrame())) + + with tab4: + display_related_queries(trends_data.get('related_queries', pd.DataFrame())) + + with tab5: + display_related_topics(trends_data.get('related_topics', pd.DataFrame())) + + # Add a footer with data source information + st.markdown("---") + st.caption("Data source: Google Trends | Last updated: " + pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")) + +def display_keywords_section(keywords): + """Display related keywords from Google Trends in a table format.""" + if not keywords: + st.info("No related keywords data available.") + return + + st.subheader("Related Keywords") + st.write("Keywords related to your search:") + + # Add explanation about related keywords + with st.expander("ā„¹ļø About Related Keywords", expanded=False): + st.markdown(""" + **What are Related Keywords?** + + Related keywords are terms that are frequently searched together with your main keyword. + These keywords can help you understand what topics are associated with your search term + and can be valuable for content planning and SEO strategies. + + **How to use this data:** + + - Use these keywords to expand your content strategy + - Identify gaps in your content that you could fill + - Understand what your audience is interested in + - Improve your SEO by incorporating these terms naturally in your content + """) + + # Create a DataFrame for better display + df = pd.DataFrame(keywords, columns=['Keyword']) + st.dataframe(df, use_container_width=True) + + # Add a note about the number of keywords + st.caption(f"Found {len(keywords)} related keywords") + +def display_interest_over_time(interest_df): + """Display a chart showing interest over time for a given search keyword.""" + if interest_df.empty: + st.info("No interest over time data available.") + return + + st.subheader("Interest Over Time") + + # Add explanation about interest over time + with st.expander("ā„¹ļø About Interest Over Time", expanded=False): + st.markdown(""" + **What is Interest Over Time?** + + Interest Over Time shows how interest in your search term has changed over the past 12 months. + The data is normalized and presented on a scale from 0 to 100, where 100 is the peak popularity + for the term, 50 means the term is half as popular, and 0 means there was not enough data. + + **How to interpret this chart:** + + - Look for peaks and valleys to identify trends + - Compare with seasonal patterns or events + - Identify if interest is growing, declining, or stable + - Use this data to time your content releases for maximum impact + """) + + try: + # Ensure we have the required columns + if 'date' not in interest_df.columns: + st.error("Interest over time data is missing the 'date' column.") + return + + if 'interest' not in interest_df.columns: + st.error("Interest over time data is missing the 'interest' column.") + return + + # Create the chart + fig = px.line( + interest_df, + x='date', + y='interest', + title='Interest Over Time', + labels={'date': 'Date', 'interest': 'Interest'}, + line_shape='spline' + ) + + fig.update_layout( + xaxis_title="Date", + yaxis_title="Interest", + hovermode='x unified' + ) + + st.plotly_chart(fig, use_container_width=True) + + # Add summary statistics + if not interest_df.empty: + col1, col2, col3 = st.columns(3) + with col1: + st.metric("Average Interest", f"{interest_df['interest'].mean():.1f}") + with col2: + st.metric("Peak Interest", f"{interest_df['interest'].max():.1f}") + with col3: + st.metric("Lowest Interest", f"{interest_df['interest'].min():.1f}") + + except Exception as e: + st.error(f"Error displaying interest over time chart: {str(e)}") + logger.error(f"Error in display_interest_over_time: {e}") + +def display_regional_interest(regional_df): + """Display a chart showing interest by region for the search keyword.""" + if regional_df.empty: + st.info("No regional interest data available.") + return + + st.subheader("Regional Interest") + + # Add explanation about regional interest + with st.expander("ā„¹ļø About Regional Interest", expanded=False): + st.markdown(""" + **What is Regional Interest?** + + Regional Interest shows how interest in your search term varies across different countries. + The data is normalized and presented on a scale from 0 to 100, where 100 is the peak popularity + for the term in that region, 50 means the term is half as popular, and 0 means there was not enough data. + + **How to interpret this map:** + + - Darker colors indicate higher interest in that region + - Lighter colors indicate lower interest + - Hover over a country to see the exact interest value + - Use this data to target your content to specific regions + """) + + try: + # Ensure we have the required columns + if 'country_code' not in regional_df.columns: + st.error("Regional interest data is missing the 'country_code' column.") + return + + if 'interest' not in regional_df.columns: + st.error("Regional interest data is missing the 'interest' column.") + return + + # Create the choropleth map + fig = go.Figure(data=go.Choropleth( + locations=regional_df['country_code'], + z=regional_df['interest'], + text=regional_df['country_code'], # This will show in the hover text + colorscale='Viridis', + colorbar_title="Interest Level", + zmin=0, + zmax=100, + marker_line_color='darkgray', + marker_line_width=0.5, + showscale=True, + colorbar=dict( + title="Interest Level", + tickformat=".0f", + tickmode="linear", + tick0=0, + dtick=20 + ) + )) + + # Update the layout for better visualization + fig.update_layout( + title=dict( + text='Regional Interest Distribution', + x=0.5, + xanchor='center' + ), + geo=dict( + showframe=False, + showcoastlines=True, + projection_type='equirectangular', + showland=True, + landcolor='lightgray', + showocean=True, + oceancolor='aliceblue', + showcountries=True, + countrycolor='darkgray' + ), + width=800, + height=500, + margin=dict(l=0, r=0, t=30, b=0) + ) + + # Display the map + st.plotly_chart(fig, use_container_width=True) + + # Display top 5 countries with highest interest + if not regional_df.empty: + st.subheader("Top Regions by Interest") + top_regions = regional_df.sort_values('interest', ascending=False).head(5) + + # Create a more visually appealing bar chart for top regions + fig_bar = go.Figure(data=[ + go.Bar( + x=top_regions['country_code'], + y=top_regions['interest'], + text=top_regions['interest'].round(1), + textposition='auto', + marker_color='rgb(55, 83, 109)' + ) + ]) + + fig_bar.update_layout( + title='Top 5 Regions by Interest Level', + xaxis_title='Region', + yaxis_title='Interest Level', + yaxis_range=[0, 100], + showlegend=False + ) + + st.plotly_chart(fig_bar, use_container_width=True) + + except Exception as e: + st.error(f"Error displaying regional interest chart: {str(e)}") + logger.error(f"Error in display_regional_interest: {e}") + +def display_related_queries(queries_df): + """Display related queries in a structured format.""" + if queries_df.empty: + st.info("No related queries data available.") + return + + st.subheader("Related Queries") + + # Add explanation about related queries + with st.expander("ā„¹ļø About Related Queries", expanded=False): + st.markdown(""" + **What are Related Queries?** + + Related Queries show what people search for before and after searching for your keyword. + These queries can help you understand the search intent and context around your keyword. + + **How to interpret this data:** + + - The 'value' column shows the relative interest compared to your main keyword + - Higher values indicate stronger association with your keyword + - Use these queries to expand your content strategy + - Identify what questions your audience is trying to answer + """) + + try: + # Ensure we have the required columns + if 'query' not in queries_df.columns: + st.error("Related queries data is missing the 'query' column.") + return + + if 'value' not in queries_df.columns: + st.error("Related queries data is missing the 'value' column.") + return + + # Sort by value in descending order + queries_df = queries_df.sort_values('value', ascending=False) + + # Display as a table + st.dataframe(queries_df, use_container_width=True) + + # Add a note about the number of queries + st.caption(f"Found {len(queries_df)} related queries") + + except Exception as e: + st.error(f"Error displaying related queries: {str(e)}") + logger.error(f"Error in display_related_queries: {e}") + +def display_related_topics(topics_df): + """Display related topics in a structured format.""" + if topics_df.empty: + st.info("No related topics data available.") + return + + st.subheader("Related Topics") + + # Add explanation about related topics + with st.expander("ā„¹ļø About Related Topics", expanded=False): + st.markdown(""" + **What are Related Topics?** + + Related Topics show broader topics that are associated with your search term. + These topics can help you understand the broader context and themes related to your keyword. + + **How to interpret this data:** + + - The 'value' column shows the relative interest compared to your main keyword + - Higher values indicate stronger association with your keyword + - Use these topics to understand the broader context of your keyword + - Identify themes that might be relevant to your content strategy + """) + + try: + # Ensure we have the required columns + if 'topic' not in topics_df.columns: + st.error("Related topics data is missing the 'topic' column.") + return + + if 'value' not in topics_df.columns: + st.error("Related topics data is missing the 'value' column.") + return + + # Sort by value in descending order + topics_df = topics_df.sort_values('value', ascending=False) + + # Display as a table + st.dataframe(topics_df, use_container_width=True) + + # Add a note about the number of topics + st.caption(f"Found {len(topics_df)} related topics") + + except Exception as e: + st.error(f"Error displaying related topics: {str(e)}") + logger.error(f"Error in display_related_topics: {e}") + +def process_trends_data(trends_data): + """ + Process and format Google Trends data for display. + + Args: + trends_data (dict): Raw Google Trends data + + Returns: + dict: Formatted data ready for display + """ + if not trends_data: + return {} + + processed_data = {} + + # Process related keywords + if 'related_keywords' in trends_data: + processed_data['related_keywords'] = trends_data['related_keywords'] + + # Process interest over time + if 'interest_over_time' in trends_data and not trends_data['interest_over_time'].empty: + processed_data['interest_over_time'] = trends_data['interest_over_time'] + + # Process regional interest + if 'regional_interest' in trends_data and not trends_data['regional_interest'].empty: + processed_data['regional_interest'] = trends_data['regional_interest'] + + # Process related queries + if 'related_queries' in trends_data and not trends_data['related_queries'].empty: + processed_data['related_queries'] = trends_data['related_queries'] + + # Process related topics + if 'related_topics' in trends_data and not trends_data['related_topics'].empty: + processed_data['related_topics'] = trends_data['related_topics'] + + return processed_data \ No newline at end of file diff --git a/lib/alwrity_ui/keyword_web_researcher.py b/lib/alwrity_ui/keyword_web_researcher.py index 12bf1509..d5294ed8 100644 --- a/lib/alwrity_ui/keyword_web_researcher.py +++ b/lib/alwrity_ui/keyword_web_researcher.py @@ -102,7 +102,12 @@ def validate_api_keys(): return api_keys def do_web_research(): - """Input keywords and do web research with advanced options.""" + """Main function to perform web research based on user input.""" + + # Reset session state variables for this research operation + if 'metaphor_results_displayed' in st.session_state: + del st.session_state.metaphor_results_displayed + logger.info("Starting do_web_research function") try: @@ -509,7 +514,7 @@ def do_web_research(): status_display.success("✨ Research completed!") # Display results in an organized way - with st.expander("šŸ“Š Research Results", expanded=True): + with st.expander("šŸ“Š Research Results", expanded=False): st.write(web_research_result) else: st.warning("No results found for your search")