From a2fb77f70094253d0072cc746008720c9efc1a51 Mon Sep 17 00:00:00 2001 From: ajaysi Date: Sat, 5 Apr 2025 14:55:28 +0530 Subject: [PATCH] AI Web Researcher: Added Exa answer and Tavily answer to the metaphor search results Added AI insights to the metaphor search results Better display of AI search results --- .../gpt_online_researcher.py | 283 ++++++++-- .../metaphor_basic_neural_web_search.py | 503 ++++++++++++++++-- lib/ai_web_researcher/tavily_ai_search.py | 126 +++-- lib/alwrity_ui/display_google_serp_results.py | 277 ++++++++++ lib/alwrity_ui/keyword_web_researcher.py | 13 +- 5 files changed, 1087 insertions(+), 115 deletions(-) create mode 100644 lib/alwrity_ui/display_google_serp_results.py diff --git a/lib/ai_web_researcher/gpt_online_researcher.py b/lib/ai_web_researcher/gpt_online_researcher.py index c183d003..d2238143 100644 --- a/lib/ai_web_researcher/gpt_online_researcher.py +++ b/lib/ai_web_researcher/gpt_online_researcher.py @@ -22,14 +22,23 @@ import os import json +import time from pathlib import Path import sys from datetime import datetime +import streamlit as st + +from lib.alwrity_ui.display_google_serp_results import ( + process_research_results, + process_search_results, + display_research_results +) from .tavily_ai_search import get_tavilyai_results -from .metaphor_basic_neural_web_search import metaphor_find_similar, metaphor_search_articles +from .metaphor_basic_neural_web_search import metaphor_search_articles from .google_serp_search import google_search from .google_trends_researcher import do_google_trends_analysis +#from .google_gemini_web_researcher import do_gemini_web_research from loguru import logger # Configure logger @@ -40,59 +49,271 @@ logger.add(sys.stdout, ) - -def gpt_web_researcher(search_keywords): - """ Keyword based web researcher, basic, neural and Semantic search.""" +def gpt_web_researcher(search_keywords, search_mode, **kwargs): + """Keyword based web researcher with progress tracking.""" + + logger.info(f"Starting web research - Keywords: {search_keywords}, Mode: {search_mode}") + logger.debug(f"Additional parameters: {kwargs}") try: - google_search_result = do_google_serp_search(search_keywords) - tavily_search_result = do_tavily_ai_search(search_keywords) - metaphor_search_result = do_metaphor_ai_research(search_keywords) - gtrends_search_result = do_google_pytrends_analysis(search_keywords) - # get_rag_results(search_query) - print(f"\n\nReview the analysis in this file at: {os.environ.get('SEARCH_SAVE_FILE')}\n") + # Initialize result container + research_results = None + + # Create status containers + status_container = st.empty() + progress_bar = st.progress(0) + + def update_progress(message, progress=None, level="info"): + if progress is not None: + progress_bar.progress(progress) + if level == "error": + status_container.error(f"đŸšĢ {message}") + elif level == "warning": + status_container.warning(f"âš ī¸ {message}") + else: + status_container.info(f"🔄 {message}") + logger.debug(f"Progress update [{level}]: {message}") + + if search_mode == "google": + logger.info("Starting Google research pipeline") + + try: + # First try Google SERP + update_progress("Initiating SERP search...", progress=10) + serp_results = do_google_serp_search(search_keywords, **kwargs) + + if serp_results and serp_results.get('organic'): + logger.info("SERP search successful") + update_progress("SERP search completed", progress=40) + research_results = serp_results + else: + logger.warning("SERP search returned no results, falling back to Gemini") + update_progress("No SERP results, trying Gemini...", progress=45) + + # Keep it commented. Fallback to Gemini + #try: + # gemini_results = do_gemini_web_research(search_keywords) + # if gemini_results: + # logger.info("Gemini research successful") + # update_progress("Gemini research completed", progress=80) + # research_results = { + # 'source': 'gemini', + # 'results': gemini_results + # } + #except Exception as gemini_err: + # logger.error(f"Gemini research failed: {gemini_err}") + # update_progress("Gemini research failed", level="warning") + + if research_results: + update_progress("Processing final results...", progress=90) + processed_results = process_research_results(research_results) + + if processed_results: + update_progress("Research completed!", progress=100, level="success") + display_research_results(processed_results) + return processed_results + else: + error_msg = "Failed to process research results" + logger.warning(error_msg) + update_progress(error_msg, level="warning") + return None + else: + error_msg = "No results from either SERP or Gemini" + logger.warning(error_msg) + update_progress(error_msg, level="warning") + return None + + except Exception as search_err: + error_msg = f"Research pipeline failed: {str(search_err)}" + logger.error(error_msg, exc_info=True) + update_progress(error_msg, level="error") + raise + + elif search_mode == "ai": + logger.info("Starting AI research pipeline") + + try: + # Do Tavily AI Search + update_progress("Initiating Tavily AI search...", progress=10) + + # Extract relevant parameters for Tavily search + include_domains = kwargs.pop('include_domains', None) + search_depth = kwargs.pop('search_depth', 'advanced') + + # Pass the parameters to get_tavilyai_results + t_results = get_tavilyai_results( + keywords=search_keywords, + max_results=kwargs.get('num_results', 10), + include_domains=include_domains, + search_depth=search_depth, + **kwargs + ) + + # Do Metaphor AI Search + update_progress("Initiating Metaphor AI search...", progress=50) + metaphor_results, metaphor_titles = do_metaphor_ai_research(search_keywords) + + if metaphor_results is None: + update_progress("Metaphor AI search failed, continuing with Tavily results only...", level="warning") + else: + update_progress("Metaphor AI search completed successfully", progress=75) + + except Exception as ai_err: + error_msg = f"AI research pipeline failed: {str(ai_err)}" + logger.error(error_msg, exc_info=True) + update_progress(error_msg, level="error") + raise + + else: + error_msg = f"Unsupported search mode: {search_mode}" + logger.error(error_msg) + update_progress(error_msg, level="error") + raise ValueError(error_msg) + except Exception as err: - logger.error(f"Failed in gpt_web_researcher: {err}") + error_msg = f"Failed in gpt_web_researcher: {str(err)}" + logger.error(error_msg, exc_info=True) + if 'update_progress' in locals(): + update_progress(error_msg, level="error") + raise -def do_google_serp_search(search_keywords): - """ COmmon function to do google SERP analysis and return results. """ - - # FIXME: Add a return filter to either return full json, titles, PAA, relatedsearches etc. +def do_google_serp_search(search_keywords, status_container, update_progress, **kwargs): + """Perform Google SERP analysis with sidebar progress tracking.""" + + logger.info("="*50) + logger.info("Starting Google SERP Search") + logger.info("="*50) + try: - logger.info(f"Doing Google search for: {search_keywords}\n") + # Validate parameters + update_progress("Validating search parameters") + status_container.info("📝 Validating parameters...") + + if not search_keywords or not isinstance(search_keywords, str): + logger.error(f"Invalid search keywords: {search_keywords}") + raise ValueError("Search keywords must be a non-empty string") + + # Update search initiation + update_progress(f"Initiating search for: '{search_keywords}'") + status_container.info("🌐 Querying search API...") + logger.info(f"Search params: {kwargs}") + + # Execute search g_results = google_search(search_keywords) + if g_results: + # Log success + update_progress("Search completed successfully", "success") + + # Update statistics + stats = f"""Found: + - {len(g_results.get('organic', []))} organic results + - {len(g_results.get('peopleAlsoAsk', []))} related questions + - {len(g_results.get('relatedSearches', []))} related searches""" + update_progress(stats) + + # Process results + update_progress("Processing search results") + status_container.info("⚡ Processing results...") + processed_results = process_search_results(g_results) + + # Extract titles + update_progress("Extracting information") g_titles = extract_info(g_results, 'titles') - return(g_results, g_titles) + + # Final success + update_progress("Analysis completed successfully", "success") + status_container.success("✨ Research completed!") + + # Clear main status after delay + time.sleep(1) + status_container.empty() + + return { + 'results': g_results, + 'titles': g_titles, + 'summary': processed_results, + 'stats': { + 'organic_count': len(g_results.get('organic', [])), + 'questions_count': len(g_results.get('peopleAlsoAsk', [])), + 'related_count': len(g_results.get('relatedSearches', [])) + } + } + + else: + update_progress("No results found", "warning") + status_container.warning("âš ī¸ No results found") + return None + except Exception as err: - logger.error(f"Failed to do Google SERP research: {err}") - return None - # Not failing, as tavily would do same and then GPT-V to search. + error_msg = f"Search failed: {str(err)}" + update_progress(error_msg, "error") + logger.error(error_msg) + logger.debug("Stack trace:", exc_info=True) + raise + + finally: + logger.info("="*50) + logger.info("Google SERP Search function completed") + logger.info("="*50) -def do_tavily_ai_search(search_keywords, max_results=10): +def do_tavily_ai_search(search_keywords, max_results=10, **kwargs): """ Common function to do Tavily AI web research.""" try: - # FIXME: Include the follow-up questions as blog FAQs. logger.info(f"Doing Tavily AI search for: {search_keywords}") - t_results = get_tavilyai_results(search_keywords, max_results) - t_titles = tavily_extract_information(t_results, 'titles') - t_answer = tavily_extract_information(t_results, 'answer') - return(t_results, t_titles, t_answer) + + # Prepare Tavily search parameters + tavily_params = { + 'max_results': max_results, + 'search_depth': 'advanced' if kwargs.get('search_depth', 3) > 2 else 'basic', + 'time_range': kwargs.get('time_range', 'year'), + 'include_domains': kwargs.get('include_domains', [""]) if kwargs.get('include_domains') else [""] + } + + # Pass the parameters to get_tavilyai_results + t_results = get_tavilyai_results( + keywords=search_keywords, + **tavily_params + ) + + if t_results: + t_titles = tavily_extract_information(t_results, 'titles') + t_answer = tavily_extract_information(t_results, 'answer') + return(t_results, t_titles, t_answer) + else: + logger.warning("No results returned from Tavily AI search") + return None, None, None except Exception as err: logger.error(f"Failed to do Tavily AI Search: {err}") + return None, None, None def do_metaphor_ai_research(search_keywords): - """ """ + """ + Perform Metaphor AI research and return results with titles. + + Args: + search_keywords (str): Keywords to search for + + Returns: + tuple: (response_articles, titles) or (None, None) if search fails + """ try: - logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}") + logger.info(f"Start Semantic/Neural web search with Metaphor: {search_keywords}") response_articles = metaphor_search_articles(search_keywords) - m_titles = metaphor_extract_titles_or_text(response_articles, return_titles=True) - return(response_articles, m_titles) + + if response_articles and 'data' in response_articles: + m_titles = [result.get('title', '') for result in response_articles['data'].get('results', [])] + return response_articles, m_titles + else: + logger.warning("No valid results from Metaphor search") + return None, None + except Exception as err: logger.error(f"Failed to do Metaphor search: {err}") + return None, None def do_google_pytrends_analysis(search_keywords): @@ -163,4 +384,4 @@ def tavily_extract_information(json_data, keyword): elif keyword == 'follow-query': return json_data['follow_up_questions'] else: - return f"Invalid keyword: {keyword}" + return f"Invalid keyword: {keyword}" \ No newline at end of file diff --git a/lib/ai_web_researcher/metaphor_basic_neural_web_search.py b/lib/ai_web_researcher/metaphor_basic_neural_web_search.py index 76b5e6a9..209bab1e 100644 --- a/lib/ai_web_researcher/metaphor_basic_neural_web_search.py +++ b/lib/ai_web_researcher/metaphor_basic_neural_web_search.py @@ -116,55 +116,431 @@ def metaphor_find_similar(similar_url): return search_response - -def metaphor_search_articles(query): +def calculate_date_range(time_range: str) -> tuple: """ - Search for articles using the Metaphor API. + Calculate start and end dates based on time range selection. + + Args: + time_range (str): One of 'past_day', 'past_week', 'past_month', 'past_year', 'anytime' + + Returns: + tuple: (start_date, end_date) in ISO format with milliseconds + """ + now = datetime.utcnow() + end_date = now.strftime('%Y-%m-%dT%H:%M:%S.999Z') + + if time_range == 'past_day': + start_date = (now - timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%S.000Z') + elif time_range == 'past_week': + start_date = (now - timedelta(weeks=1)).strftime('%Y-%m-%dT%H:%M:%S.000Z') + elif time_range == 'past_month': + start_date = (now - timedelta(days=30)).strftime('%Y-%m-%dT%H:%M:%S.000Z') + elif time_range == 'past_year': + start_date = (now - timedelta(days=365)).strftime('%Y-%m-%dT%H:%M:%S.000Z') + else: # anytime + start_date = None + end_date = None + + return start_date, end_date + +def metaphor_search_articles(query, search_options: dict = None): + """ + Search for articles using the Metaphor/Exa API. Args: query (str): The search query. - num_results (int): Number of results to retrieve. - use_autoprompt (bool): Whether to use autoprompt. - include_domains (list): List of domains to include. - time_range (str): Time range for published articles ("day", "week", "month", "year", "anytime"). + search_options (dict): Search configuration options including: + - num_results (int): Number of results to retrieve + - use_autoprompt (bool): Whether to use autoprompt + - include_domains (list): List of domains to include + - time_range (str): One of 'past_day', 'past_week', 'past_month', 'past_year', 'anytime' + - exclude_domains (list): List of domains to exclude Returns: - MetaphorResponse: The response from the Metaphor API. + dict: Search results and metadata """ - metaphor = get_metaphor_client() + exa = get_metaphor_client() try: - include_domains, start_published_date, num_results, similar_url = cfg_search_param('exa') - - logger.info(f"Metaphor web search with Date: {start_published_date} and Query: {query}") + # Initialize default search options + if search_options is None: + search_options = {} + + # Get config parameters or use defaults try: - search_response = metaphor.search_and_contents( - query, - include_domains=include_domains, - use_autoprompt=True, - start_published_date=start_published_date, - num_results=num_results - ) - except Exception as err: - logger.error(f"Failed in metaphor.search_and_contents: {err}") - - # From each webpage, get a summary of the web page. - contents_response = search_response.results - # FIXME: Need to summarize for smaller input context window. -# for content in tqdm(contents_response, desc="Reading Web URL content:", unit="content"): -# summarized_content = summarize_web_content(content.text, "gemini") -# content.text = summarized_content - - print_search_result(contents_response) + include_domains, _, num_results, _ = cfg_search_param('exa') + except Exception as cfg_err: + logger.warning(f"Failed to load config parameters: {cfg_err}. Using defaults.") + include_domains = None + num_results = 10 + + # Calculate date range based on time_range option + time_range = search_options.get('time_range', 'anytime') + start_published_date, end_published_date = calculate_date_range(time_range) + + # Prepare search parameters + search_params = { + 'num_results': search_options.get('num_results', num_results), + 'summary': True, # Always get summaries + 'include_domains': search_options.get('include_domains', include_domains), + 'use_autoprompt': search_options.get('use_autoprompt', True), + } + + # Add date parameters only if they are not None + if start_published_date: + search_params['start_published_date'] = start_published_date + if end_published_date: + search_params['end_published_date'] = end_published_date + + logger.info(f"Exa web search with params: {search_params} and Query: {query}") + + # Execute search + search_response = exa.search_and_contents( + query, + **search_params + ) + + if not search_response or not hasattr(search_response, 'results'): + logger.warning("No results returned from Exa search") + return None + + # Get cost information safely + try: + cost_dollars = { + 'total': float(search_response.cost_dollars['total']), + } if hasattr(search_response, 'cost_dollars') else None + except Exception as cost_err: + logger.warning(f"Error processing cost information: {cost_err}") + cost_dollars = None + + # Format response to match expected structure + formatted_response = { + "data": { + "requestId": getattr(search_response, 'request_id', None), + "resolvedSearchType": "neural", + "results": [ + { + "id": result.url, + "title": result.title, + "url": result.url, + "publishedDate": result.published_date if hasattr(result, 'published_date') else None, + "author": getattr(result, 'author', None), + "score": getattr(result, 'score', 0), + "summary": result.summary if hasattr(result, 'summary') else None, + "text": result.text if hasattr(result, 'text') else None, + "image": getattr(result, 'image', None), + "favicon": getattr(result, 'favicon', None) + } + for result in search_response.results + ], + "costDollars": cost_dollars + } + } + + # Get AI-generated answer from Metaphor + try: + exa_answer = get_exa_answer(query) + if exa_answer: + formatted_response.update(exa_answer) + except Exception as exa_err: + logger.warning(f"Error getting Exa answer: {exa_err}") + + # Get AI-generated answer from Tavily + try: + # Import the function directly from the module + import importlib + tavily_module = importlib.import_module('lib.ai_web_researcher.tavily_ai_search') + if hasattr(tavily_module, 'do_tavily_ai_search'): + tavily_response = tavily_module.do_tavily_ai_search(query) + if tavily_response and 'answer' in tavily_response: + formatted_response.update({ + "tavily_answer": tavily_response.get("answer"), + "tavily_citations": tavily_response.get("citations", []), + "tavily_cost_dollars": tavily_response.get("costDollars", {"total": 0}) + }) + else: + logger.warning("do_tavily_ai_search function not found in tavily_ai_search module") + except Exception as tavily_err: + logger.warning(f"Error getting Tavily answer: {tavily_err}") + + # Display results in Streamlit + streamlit_display_metaphor_results(formatted_response) + return formatted_response - if similar_url: - logger.info(f"Doing similar/semantic search for URL: {similar_url}") - metaphor_find_similar(similar_url) - return contents_response - except Exception as e: - logger.error(f"Error in Metaphor searching articles: {e}") - raise + logger.error(f"Error in Exa searching articles: {e}") + return None +def streamlit_display_metaphor_results(metaphor_response: dict): + """ + Display Metaphor search results in Streamlit with enhanced metrics and popovers + + Args: + metaphor_response (dict): Response from Metaphor search + """ + if not metaphor_response or 'data' not in metaphor_response: + st.error("No valid Metaphor search results to display") + return + + # Initialize session state variables if they don't exist + if 'search_insights' not in st.session_state: + st.session_state.search_insights = None + + if 'metaphor_response' not in st.session_state: + st.session_state.metaphor_response = metaphor_response + + if 'insights_generated' not in st.session_state: + st.session_state.insights_generated = False + + # Update the stored metaphor_response with the latest data + st.session_state.metaphor_response = metaphor_response + + # Display metrics in columns + col1, col2, col3 = st.columns(3) + + # Calculate metrics + results = metaphor_response['data']['results'] + total_results = len(results) + avg_score = sum(r['score'] for r in results if r['score']) / total_results if total_results > 0 else 0 + + with col1: + st.metric( + label="Total Results", + value=total_results + ) + with col2: + if metaphor_response['data'].get('costDollars'): + cost = metaphor_response['data']['costDollars'] + st.metric( + label="Search Cost", + value=f"${cost['total']:.3f}" + ) + with col3: + st.metric( + label="Average Relevance Score", + value=f"{avg_score:.2f}" + ) + + # Display AI-generated answers side by side + if 'answer' in metaphor_response or 'tavily_answer' in metaphor_response: + st.markdown("### 🤖 AI-Generated Research Answers") + + # Create two columns for side-by-side display + tavily_col, metaphor_col = st.columns(2) + + # Display Tavily answer if available + with tavily_col: + if 'tavily_answer' in metaphor_response: + st.markdown("#### 🔍 Tavily AI Answer") + st.markdown(f""" +
+ {metaphor_response['tavily_answer']} +
+ """, unsafe_allow_html=True) + + if metaphor_response.get('tavily_cost_dollars'): + st.caption(f"Tavily Answer Cost: ${metaphor_response['tavily_cost_dollars']['total']:.3f}") + + if metaphor_response.get('tavily_citations'): + with st.expander("📚 Tavily Sources"): + for idx, citation in enumerate(metaphor_response['tavily_citations'], 1): + st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})") + else: + st.markdown("#### 🔍 Tavily AI Answer") + st.info("No Tavily answer available for this query.") + + # Display Metaphor answer if available + with metaphor_col: + if 'answer' in metaphor_response: + st.markdown("#### 🔍 Metaphor AI Answer") + st.markdown(f""" +
+ {metaphor_response['answer']} +
+ """, unsafe_allow_html=True) + + if metaphor_response.get('answerCostDollars'): + st.caption(f"Metaphor Answer Cost: ${metaphor_response['answerCostDollars']['total']:.3f}") + + if metaphor_response.get('citations'): + with st.expander("📚 Metaphor Sources"): + for idx, citation in enumerate(metaphor_response['citations'], 1): + st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})") + else: + st.markdown("#### 🔍 Metaphor AI Answer") + st.info("No Metaphor answer available for this query.") + + # Add "Get Search Insights" button - moved outside the AI answers conditional + st.markdown("### 🔍 Search Insights") + + # Create a container for the insights + insights_container = st.container() + + # Use a button with a callback function + if st.button("Generate Search Insights", type="primary"): + # Set a flag in session state to indicate that insights should be generated + st.session_state.insights_generated = True + + # Store the current metaphor_response in session state + st.session_state.metaphor_response = metaphor_response + + # Redirect to the same page with a query parameter to trigger insights generation + st.experimental_rerun() + + # If insights should be generated, do it in a separate container + if st.session_state.insights_generated: + with insights_container: + with st.spinner("Analyzing search results to generate insights..."): + # Get the stored metaphor_response from session state + stored_response = st.session_state.metaphor_response + stored_results = stored_response['data']['results'] + + # Prepare data for analysis + analysis_data = { + "metaphor_results": stored_results, + "metaphor_answer": stored_response.get("answer", ""), + "tavily_answer": stored_response.get("tavily_answer", ""), + "metaphor_citations": stored_response.get("citations", []), + "tavily_citations": stored_response.get("tavily_citations", []) + } + + # Create the analysis prompt + analysis_prompt = f""" + **Search Intent & User Needs Analysis** + + I have conducted research using both Tavily and Metaphor AI search engines. + Below is the data from both sources: + + **Metaphor AI Answer:** + {analysis_data["metaphor_answer"]} + + **Tavily AI Answer:** + {analysis_data["tavily_answer"]} + + **Search Results:** + {[f"{i+1}. {r['title']} - {r['summary']}" for i, r in enumerate(analysis_data["metaphor_results"])]} + + **Citations:** + {[f"{i+1}. {c.get('title', 'Untitled')} - {c.get('url', 'No URL')}" for i, c in enumerate(analysis_data["metaphor_citations"] + analysis_data["tavily_citations"])]} + + Based on this research data, please provide the following insights: + + **Search Intent & User Needs** + ``` + Review the research data and identify: + 1. The distribution of search intent (categorize as Informational/Commercial/Navigational/Transactional) + 2. Most common user questions and their patterns + 3. Frequently mentioned pain points or challenges + 4. Recurring solutions or approaches to addressing these challenges + 5. Gaps between user questions and available answers + + Present findings in a structured format with percentages and specific examples. + ``` + + Format your response as a comprehensive analysis with clear sections, bullet points, and examples from the research data. + """ + + try: + # Import the llm_text_gen function + import importlib + text_gen_module = importlib.import_module('lib.gpt_providers.text_generation.main_text_generation') + if hasattr(text_gen_module, 'llm_text_gen'): + # Generate insights using llm_text_gen + insights = text_gen_module.llm_text_gen(analysis_prompt) + + # Store insights in session state + st.session_state.search_insights = insights + + # Reset the flag to prevent regeneration on next rerun + st.session_state.insights_generated = False + else: + st.error("Could not find llm_text_gen function in the text generation module.") + except Exception as e: + st.error(f"Error generating insights: {str(e)}") + logger.error(f"Error generating insights: {e}") + + # Display insights if they exist in session state + if st.session_state.search_insights: + with insights_container: + st.markdown("### 🔍 Search Intent & User Needs Analysis") + st.markdown(st.session_state.search_insights) + + # Create DataFrame from results + df = pd.DataFrame(results) + + # Prepare data for display + display_df = df.copy() + display_df['Visit Site'] = display_df['url'] + + # Format publishedDate as string if it exists + if 'publishedDate' in display_df.columns: + display_df['publishedDate'] = display_df['publishedDate'].apply( + lambda x: x[:10] if isinstance(x, str) else 'N/A' + ) + + # Configure columns for data editor + columns = { + 'title': st.column_config.TextColumn( + 'Title', + width='large', + required=True, + ), + 'author': st.column_config.TextColumn( + 'Author', + width='medium', + ), + 'publishedDate': st.column_config.TextColumn( + 'Published Date', + width='medium', + ), + 'score': st.column_config.NumberColumn( + 'Relevance Score', + width='small', + format="%.2f" + ), + 'Visit Site': st.column_config.LinkColumn( + 'Link', + width='small', + display_text='Visit Site', + ), + 'summary': st.column_config.TextColumn( + 'Summary', + width='large', + required=True, + ) + } + + # Display results in data editor + st.data_editor( + display_df, + column_config=columns, + hide_index=True, + num_rows='dynamic', + disabled=True, + column_order=['title', 'author', 'publishedDate', 'score', 'summary', 'Visit Site'] + ) + + # Display detailed summaries with popovers + st.write("### Detailed Summaries") + for idx, result in enumerate(results, 1): + with st.expander(f"📄 {result['title']}", expanded=False): + col1, col2 = st.columns([3, 1]) + with col1: + st.markdown(f"**Summary**") + st.markdown(result['summary']) + with col2: + st.markdown("**Details**") + st.markdown(f"**Author:** {result['author'] if result['author'] else 'N/A'}") + st.markdown(f"**Published:** {result['publishedDate'][:10] if result['publishedDate'] else 'N/A'}") + st.markdown(f"**Score:** {result['score']:.2f}") + st.markdown(f"[Visit Site]({result['url']})") + + # Display search metadata + st.divider() + col1, col2 = st.columns(2) + with col1: + st.caption(f"Search Type: {metaphor_response['data']['resolvedSearchType']}") + with col2: + st.caption(f"Request ID: {metaphor_response['data']['requestId']}") def metaphor_news_summarizer(news_keywords): @@ -240,3 +616,56 @@ def metaphor_scholar_search(query, include_domains=None, time_range="anytime"): return response except Exception as e: logger.error(f"Error in searching papers: {e}") + +def get_exa_answer(query: str, system_prompt: str = None) -> dict: + """ + Get an AI-generated answer for a query using Exa's answer endpoint. + + Args: + query (str): The search query to get an answer for + system_prompt (str, optional): Custom system prompt for the LLM. If None, uses default prompt. + + Returns: + dict: Response containing answer, citations, and cost information + { + "answer": str, + "citations": list[dict], + "costDollars": dict + } + """ + exa = get_metaphor_client() + try: + # Use default system prompt if none provided + if system_prompt is None: + system_prompt = ( + "I am doing research to write factual content. " + "Help me find answers for content generation task. " + "Provide detailed, well-structured answers with clear citations." + ) + + logger.info(f"Getting Exa answer for query: {query}") + logger.debug(f"Using system prompt: {system_prompt}") + + # Make API call to get answer with system_prompt parameter + result = exa.answer( + query, + model="exa", + text=True # Include full text in citations + ) + + if not result or not result.get('answer'): + logger.warning("No answer received from Exa") + return None + + # Format response to match expected structure + response = { + "answer": result.get('answer'), + "citations": result.get('citations', []), + "costDollars": result.get('costDollars', {"total": 0}) + } + + return response + + except Exception as e: + logger.error(f"Error getting Exa answer: {e}") + return None diff --git a/lib/ai_web_researcher/tavily_ai_search.py b/lib/ai_web_researcher/tavily_ai_search.py index 35d9f2c9..3063a7d0 100644 --- a/lib/ai_web_researcher/tavily_ai_search.py +++ b/lib/ai_web_researcher/tavily_ai_search.py @@ -49,17 +49,9 @@ from tenacity import retry, stop_after_attempt, wait_random_exponential @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) -def get_tavilyai_results(keywords, max_results=5): +def get_tavilyai_results(keywords, max_results=5, include_domains=None, search_depth="advanced", **kwargs): """ Get Tavily AI search results based on specified keywords and options. - - Args: - keywords (str): Keywords for Tavily AI search. - include_urls (str): Comma-separated URLs to include in the search. - search_depth (str, optional): Search depth option (default is "advanced"). - - Returns: - dict: Tavily AI search results. """ # Run Tavily search logger.info(f"Running Tavily search on: {keywords}") @@ -74,56 +66,100 @@ def get_tavilyai_results(keywords, max_results=5): client = TavilyClient(api_key=api_key) except Exception as err: logger.error(f"Failed to create Tavily client. Check TAVILY_API_KEY: {err}") - - # Read search config params from the file. - try: - include_urls = cfg_search_param('tavily') - except Exception as err: - logger.error(f"Failed to read search params from main_config: {err}") + raise try: - if include_urls: - tavily_search_result = client.search(keywords, - search_depth="advanced", - include_answer=True, - max_results=max_results, - include_domains=include_urls) - else: - tavily_search_result = client.search(keywords, - search_depth = "advanced", - include_answer=True, - max_results=max_results) + # Create search parameters exactly matching Tavily's API format + tavily_search_result = client.search( + query=keywords, + search_depth="advanced", + time_range="year", + include_answer="advanced", + include_domains=[""] if not include_domains else include_domains, + max_results=max_results + ) + + if tavily_search_result: + print_result_table(tavily_search_result) + streamlit_display_results(tavily_search_result) + return tavily_search_result + return None - print_result_table(tavily_search_result) - streamlit_display_results(tavily_search_result) - return(tavily_search_result) except Exception as err: logger.error(f"Failed to do Tavily Research: {err}") + raise def streamlit_display_results(output_data): - """Display Tavily AI search results in Streamlit UI.""" + """Display Tavily AI search results in Streamlit UI with enhanced visualization.""" - # Prepare data for display - table_data = [] + # Display the 'answer' in Streamlit with enhanced styling + answer = output_data.get("answer", "No answer available") + st.markdown("### 🤖 AI-Generated Answer") + st.markdown(f""" +
+ {answer} +
+ """, unsafe_allow_html=True) + + # Display follow-up questions if available + follow_up_questions = output_data.get("follow_up_questions", []) + if follow_up_questions: + st.markdown("### ❓ Follow-up Questions") + for i, question in enumerate(follow_up_questions, 1): + st.markdown(f"**{i}.** {question}") + + # Prepare data for display with dataeditor + st.markdown("### 📊 Search Results") + + # Create a DataFrame for the results + import pandas as pd + results_data = [] + for item in output_data.get("results", []): title = item.get("title", "") snippet = item.get("content", "") link = item.get("url", "") - table_data.append([title, snippet, link]) + results_data.append({ + "Title": title, + "Content": snippet, + "Link": link + }) + + if results_data: + df = pd.DataFrame(results_data) + + # Display the data editor + st.data_editor( + df, + column_config={ + "Title": st.column_config.TextColumn( + "Title", + help="Article title", + width="medium", + ), + "Content": st.column_config.TextColumn( + "Content", + help="Click the button below to view full content", + width="large", + ), + "Link": st.column_config.LinkColumn( + "Link", + help="Click to visit the website", + width="small", + display_text="Visit Site" + ), + }, + hide_index=True, + use_container_width=True, + ) - # Display the table in Streamlit - st.table(table_data) - - # Display the 'answer' in Streamlit - answer = output_data.get("answer", "No answer available") - st.write(f"**The answer to your search query:** {answer}") - - # Display follow-up questions if available - follow_up_questions = output_data.get("follow_up_questions", []) - if follow_up_questions: - st.write(f"**Follow-up questions for the query:** {output_data.get('query')}") - st.write(", ".join(follow_up_questions)) + # Add popovers for full content display + for item in output_data.get("results", []): + with st.popover(f"View content: {item.get('title', '')[:50]}..."): + st.markdown(item.get("content", "")) + else: + st.info("No results found for your search query.") def print_result_table(output_data): diff --git a/lib/alwrity_ui/display_google_serp_results.py b/lib/alwrity_ui/display_google_serp_results.py new file mode 100644 index 00000000..636cadf1 --- /dev/null +++ b/lib/alwrity_ui/display_google_serp_results.py @@ -0,0 +1,277 @@ +import streamlit as st +import logging +from datetime import datetime +from typing import Dict, Optional, Any + +# Configure module logger +logger = logging.getLogger(__name__) + +def display_research_results(results: Dict[str, Any]) -> None: + """ + Display research results in a structured format with tabs. + + Args: + results (dict): Processed research results containing summary and data + """ + if not results: + st.warning("No results to display") + return + + # Create tabs for different result sections + tabs = st.tabs(["📊 Summary", "🔍 Results", "📈 Statistics"]) + + with tabs[0]: + display_summary_section(results) + + with tabs[1]: + if results['source'] == 'gemini': + display_gemini_results(results) + else: + display_serp_results(results) + + with tabs[2]: + display_statistics(results) + +def process_research_results(results: Dict[str, Any]) -> Optional[Dict[str, Any]]: + """Process and format research results.""" + logger.info("Processing research results") + + try: + if not results: + return None + + processed = { + 'timestamp': str(datetime.now()), + 'source': results.get('source', 'unknown'), + 'summary': {}, + 'data': {} + } + + if results.get('source') == 'gemini': + processed.update(process_gemini_results(results)) + else: + processed.update(process_serp_results(results)) + + logger.info("Results processing completed") + return processed + + except Exception as err: + logger.error(f"Failed to process results: {err}", exc_info=True) + return None + +def process_search_results(search_results: Dict[str, Any], search_type: str = "general") -> Optional[Dict[str, Any]]: + """Process search results and prepare for display.""" + logger.info(f"Processing {search_type} search results") + + try: + if not search_results: + return None + + processed = { + 'organic': process_organic_results(search_results.get('organic', [])), + 'peopleAlsoAsk': process_paa_results(search_results.get('peopleAlsoAsk', [])), + 'relatedSearches': process_related_searches(search_results.get('relatedSearches', [])), + 'metadata': { + 'timestamp': str(datetime.now()), + 'type': search_type + } + } + + return processed + + except Exception as err: + logger.error(f"Error processing search results: {err}", exc_info=True) + return None + +# Helper functions for result processing +def process_organic_results(results): + """Process organic search results.""" + return [{ + 'title': result.get('title', 'No Title'), + 'link': result.get('link', '#'), + 'snippet': result.get('snippet', 'No snippet available'), + 'position': result.get('position', 'N/A') + } for result in results] + +def process_paa_results(results): + """Process People Also Ask results.""" + return [{ + 'question': result.get('title', ''), + 'answer': result.get('snippet', 'No answer available'), + 'link': result.get('link', '#') + } for result in results] + +def process_related_searches(results): + """Process related searches.""" + return [query.get('query', '') for query in results] + +def process_gemini_results(results: Dict[str, Any]) -> Dict[str, Any]: + """ + Process Gemini API research results. + + Args: + results (dict): Raw Gemini research results + + Returns: + dict: Processed results with summary and data + """ + gemini_data = results.get('results', {}) + return { + 'summary': { + 'main_findings': gemini_data.get('main_response', ''), + 'sources': gemini_data.get('grounding_data', []), + 'processing_time': gemini_data.get('metadata', {}).get('timestamp'), + 'total_sources': len(gemini_data.get('grounding_data', [])), + 'model': gemini_data.get('metadata', {}).get('model', 'unknown') + }, + 'data': gemini_data + } + +def process_serp_results(results: Dict[str, Any]) -> Dict[str, Any]: + """ + Process SERP search results. + + Args: + results (dict): Raw SERP results + + Returns: + dict: Processed results with summary and data + """ + organic_results = results.get('organic', []) + paa_results = results.get('peopleAlsoAsk', []) + related_searches = results.get('relatedSearches', []) + + return { + 'summary': { + 'total_results': len(organic_results), + 'sources': [result.get('link') for result in organic_results], + 'titles': [result.get('title') for result in organic_results], + 'total_questions': len(paa_results), + 'total_related': len(related_searches) + }, + 'data': { + 'organic': process_organic_results(organic_results), + 'peopleAlsoAsk': process_paa_results(paa_results), + 'relatedSearches': process_related_searches(related_searches) + } + } + +# Display helper functions +def display_summary_section(results): + """Display summary section of results.""" + st.markdown("### 📋 Research Summary") + st.markdown(f""" + - **Source**: {results['source'].title()} + - **Time**: {results['timestamp']} + - **Total Sources**: {len(results.get('summary', {}).get('sources', []))} + """) + +def display_gemini_results(results): + """Display Gemini-specific results.""" + st.markdown("### 🤖 Gemini Research Findings") + st.write(results['summary']['main_findings']) + + with st.expander("🌐 Sources and References", expanded=False): + st.write(results['data'].get('grounding_data', 'No sources available')) + +def display_serp_results(results): + """Display SERP-specific results.""" + st.markdown("### 🔍 Search Results") + + for result in results['data'].get('organic', []): + with st.expander(f"📄 {result['title']}", expanded=False): + st.markdown(f""" + **Rank:** {result['position']} + + **Link:** [{result['link']}]({result['link']}) + + **Snippet:** + {result['snippet']} + """) + +def display_statistics(results: Dict[str, Any]) -> None: + """ + Display statistical information about search results. + + Args: + results (dict): Processed research results + """ + st.markdown("### 📈 Research Statistics") + + # Source-specific metrics + if results['source'] == 'gemini': + col1, col2 = st.columns(2) + with col1: + st.metric( + "Sources Analyzed", + results.get('summary', {}).get('total_sources', 0) + ) + with col2: + st.metric( + "Model Used", + results.get('summary', {}).get('model', 'Unknown') + ) + + else: # SERP results + col1, col2, col3 = st.columns(3) + with col1: + st.metric( + "Organic Results", + results.get('summary', {}).get('total_results', 0) + ) + with col2: + st.metric( + "Related Questions", + results.get('summary', {}).get('total_questions', 0) + ) + with col3: + st.metric( + "Related Searches", + results.get('summary', {}).get('total_related', 0) + ) + + # Common metrics + st.markdown("#### 🕒 Timing Information") + st.info(f"Research completed at: {results['timestamp']}") + + # Display data quality metrics + st.markdown("#### 📊 Data Quality") + quality_metrics = calculate_quality_metrics(results) + + col1, col2 = st.columns(2) + with col1: + st.progress(quality_metrics['completeness']) + st.caption("Data Completeness") + with col2: + st.progress(quality_metrics['relevance']) + st.caption("Estimated Relevance") + +def calculate_quality_metrics(results: Dict[str, Any]) -> Dict[str, float]: + """ + Calculate quality metrics for the research results. + + Args: + results (dict): Processed research results + + Returns: + dict: Quality metrics including completeness and relevance scores + """ + try: + if results['source'] == 'gemini': + completeness = 1.0 if results['summary']['main_findings'] else 0.0 + relevance = 0.8 if results['summary']['sources'] else 0.4 + else: + organic_results = results.get('summary', {}).get('total_results', 0) + completeness = min(organic_results / 10, 1.0) # Normalize to 0-1 + has_paa = bool(results.get('summary', {}).get('total_questions', 0)) + has_related = bool(results.get('summary', {}).get('total_related', 0)) + relevance = (0.6 + (0.2 if has_paa else 0) + (0.2 if has_related else 0)) + + return { + 'completeness': completeness, + 'relevance': relevance + } + + except Exception as err: + logger.error(f"Error calculating quality metrics: {err}") + return {'completeness': 0.0, 'relevance': 0.0} \ No newline at end of file diff --git a/lib/alwrity_ui/keyword_web_researcher.py b/lib/alwrity_ui/keyword_web_researcher.py index 97a9ed5c..12bf1509 100644 --- a/lib/alwrity_ui/keyword_web_researcher.py +++ b/lib/alwrity_ui/keyword_web_researcher.py @@ -323,7 +323,8 @@ def do_web_research(): "num_results": 10, "time_range": "past month", "include_domains": "", - "similar_url": "" + "similar_url": "", + "search_mode": "google" # Default search mode } # Define the research options dialog function @@ -425,6 +426,12 @@ def do_web_research(): horizontal=True, help="Choose your preferred research method" ) + + # Map the selected option to the search_mode value + for mode, label, _, _ in search_options: + if label == selected_option: + st.session_state.research_options["search_mode"] = mode + break else: st.warning("No search methods available. Please configure API keys.") @@ -439,7 +446,7 @@ def do_web_research(): st.rerun() # Main interface - st.title("Keyword Research Assistant") + st.title("ALwrity Web Researcher") # Primary search area with help popover with st.popover("â„šī¸ Keyword Research Tips"): @@ -450,6 +457,7 @@ def do_web_research(): 3. **Search Depth**: Higher depth = more comprehensive but slower 4. **Target Audience**: Affects content recommendations 5. **Content Type**: Influences research focus + 6. **Search Mode**: Choose between traditional web research(Google), AI-powered search(Tavily and Metaphor) and Deep Researcher """) col1, col2 = st.columns([3, 1]) @@ -484,6 +492,7 @@ def do_web_research(): # Execute search with all parameters web_research_result = gpt_web_researcher( search_keywords=st.session_state.research_options["primary_keywords"], + search_mode=st.session_state.research_options["search_mode"], related_keywords=st.session_state.research_options["related_keywords"], target_audience=st.session_state.research_options["target_audience"], content_type=st.session_state.research_options["content_type"],