diff --git a/lib/ai_web_researcher/gpt_online_researcher.py b/lib/ai_web_researcher/gpt_online_researcher.py
index c183d003..d2238143 100644
--- a/lib/ai_web_researcher/gpt_online_researcher.py
+++ b/lib/ai_web_researcher/gpt_online_researcher.py
@@ -22,14 +22,23 @@
import os
import json
+import time
from pathlib import Path
import sys
from datetime import datetime
+import streamlit as st
+
+from lib.alwrity_ui.display_google_serp_results import (
+ process_research_results,
+ process_search_results,
+ display_research_results
+)
from .tavily_ai_search import get_tavilyai_results
-from .metaphor_basic_neural_web_search import metaphor_find_similar, metaphor_search_articles
+from .metaphor_basic_neural_web_search import metaphor_search_articles
from .google_serp_search import google_search
from .google_trends_researcher import do_google_trends_analysis
+#from .google_gemini_web_researcher import do_gemini_web_research
from loguru import logger
# Configure logger
@@ -40,59 +49,271 @@ logger.add(sys.stdout,
)
-
-def gpt_web_researcher(search_keywords):
- """ Keyword based web researcher, basic, neural and Semantic search."""
+def gpt_web_researcher(search_keywords, search_mode, **kwargs):
+ """Keyword based web researcher with progress tracking."""
+
+ logger.info(f"Starting web research - Keywords: {search_keywords}, Mode: {search_mode}")
+ logger.debug(f"Additional parameters: {kwargs}")
try:
- google_search_result = do_google_serp_search(search_keywords)
- tavily_search_result = do_tavily_ai_search(search_keywords)
- metaphor_search_result = do_metaphor_ai_research(search_keywords)
- gtrends_search_result = do_google_pytrends_analysis(search_keywords)
- # get_rag_results(search_query)
- print(f"\n\nReview the analysis in this file at: {os.environ.get('SEARCH_SAVE_FILE')}\n")
+ # Initialize result container
+ research_results = None
+
+ # Create status containers
+ status_container = st.empty()
+ progress_bar = st.progress(0)
+
+ def update_progress(message, progress=None, level="info"):
+ if progress is not None:
+ progress_bar.progress(progress)
+ if level == "error":
+ status_container.error(f"đĢ {message}")
+ elif level == "warning":
+ status_container.warning(f"â ī¸ {message}")
+ else:
+ status_container.info(f"đ {message}")
+ logger.debug(f"Progress update [{level}]: {message}")
+
+ if search_mode == "google":
+ logger.info("Starting Google research pipeline")
+
+ try:
+ # First try Google SERP
+ update_progress("Initiating SERP search...", progress=10)
+ serp_results = do_google_serp_search(search_keywords, **kwargs)
+
+ if serp_results and serp_results.get('organic'):
+ logger.info("SERP search successful")
+ update_progress("SERP search completed", progress=40)
+ research_results = serp_results
+ else:
+ logger.warning("SERP search returned no results, falling back to Gemini")
+ update_progress("No SERP results, trying Gemini...", progress=45)
+
+ # Keep it commented. Fallback to Gemini
+ #try:
+ # gemini_results = do_gemini_web_research(search_keywords)
+ # if gemini_results:
+ # logger.info("Gemini research successful")
+ # update_progress("Gemini research completed", progress=80)
+ # research_results = {
+ # 'source': 'gemini',
+ # 'results': gemini_results
+ # }
+ #except Exception as gemini_err:
+ # logger.error(f"Gemini research failed: {gemini_err}")
+ # update_progress("Gemini research failed", level="warning")
+
+ if research_results:
+ update_progress("Processing final results...", progress=90)
+ processed_results = process_research_results(research_results)
+
+ if processed_results:
+ update_progress("Research completed!", progress=100, level="success")
+ display_research_results(processed_results)
+ return processed_results
+ else:
+ error_msg = "Failed to process research results"
+ logger.warning(error_msg)
+ update_progress(error_msg, level="warning")
+ return None
+ else:
+ error_msg = "No results from either SERP or Gemini"
+ logger.warning(error_msg)
+ update_progress(error_msg, level="warning")
+ return None
+
+ except Exception as search_err:
+ error_msg = f"Research pipeline failed: {str(search_err)}"
+ logger.error(error_msg, exc_info=True)
+ update_progress(error_msg, level="error")
+ raise
+
+ elif search_mode == "ai":
+ logger.info("Starting AI research pipeline")
+
+ try:
+ # Do Tavily AI Search
+ update_progress("Initiating Tavily AI search...", progress=10)
+
+ # Extract relevant parameters for Tavily search
+ include_domains = kwargs.pop('include_domains', None)
+ search_depth = kwargs.pop('search_depth', 'advanced')
+
+ # Pass the parameters to get_tavilyai_results
+ t_results = get_tavilyai_results(
+ keywords=search_keywords,
+ max_results=kwargs.get('num_results', 10),
+ include_domains=include_domains,
+ search_depth=search_depth,
+ **kwargs
+ )
+
+ # Do Metaphor AI Search
+ update_progress("Initiating Metaphor AI search...", progress=50)
+ metaphor_results, metaphor_titles = do_metaphor_ai_research(search_keywords)
+
+ if metaphor_results is None:
+ update_progress("Metaphor AI search failed, continuing with Tavily results only...", level="warning")
+ else:
+ update_progress("Metaphor AI search completed successfully", progress=75)
+
+ except Exception as ai_err:
+ error_msg = f"AI research pipeline failed: {str(ai_err)}"
+ logger.error(error_msg, exc_info=True)
+ update_progress(error_msg, level="error")
+ raise
+
+ else:
+ error_msg = f"Unsupported search mode: {search_mode}"
+ logger.error(error_msg)
+ update_progress(error_msg, level="error")
+ raise ValueError(error_msg)
+
except Exception as err:
- logger.error(f"Failed in gpt_web_researcher: {err}")
+ error_msg = f"Failed in gpt_web_researcher: {str(err)}"
+ logger.error(error_msg, exc_info=True)
+ if 'update_progress' in locals():
+ update_progress(error_msg, level="error")
+ raise
-def do_google_serp_search(search_keywords):
- """ COmmon function to do google SERP analysis and return results. """
-
- # FIXME: Add a return filter to either return full json, titles, PAA, relatedsearches etc.
+def do_google_serp_search(search_keywords, status_container, update_progress, **kwargs):
+ """Perform Google SERP analysis with sidebar progress tracking."""
+
+ logger.info("="*50)
+ logger.info("Starting Google SERP Search")
+ logger.info("="*50)
+
try:
- logger.info(f"Doing Google search for: {search_keywords}\n")
+ # Validate parameters
+ update_progress("Validating search parameters")
+ status_container.info("đ Validating parameters...")
+
+ if not search_keywords or not isinstance(search_keywords, str):
+ logger.error(f"Invalid search keywords: {search_keywords}")
+ raise ValueError("Search keywords must be a non-empty string")
+
+ # Update search initiation
+ update_progress(f"Initiating search for: '{search_keywords}'")
+ status_container.info("đ Querying search API...")
+ logger.info(f"Search params: {kwargs}")
+
+ # Execute search
g_results = google_search(search_keywords)
+
if g_results:
+ # Log success
+ update_progress("Search completed successfully", "success")
+
+ # Update statistics
+ stats = f"""Found:
+ - {len(g_results.get('organic', []))} organic results
+ - {len(g_results.get('peopleAlsoAsk', []))} related questions
+ - {len(g_results.get('relatedSearches', []))} related searches"""
+ update_progress(stats)
+
+ # Process results
+ update_progress("Processing search results")
+ status_container.info("⥠Processing results...")
+ processed_results = process_search_results(g_results)
+
+ # Extract titles
+ update_progress("Extracting information")
g_titles = extract_info(g_results, 'titles')
- return(g_results, g_titles)
+
+ # Final success
+ update_progress("Analysis completed successfully", "success")
+ status_container.success("⨠Research completed!")
+
+ # Clear main status after delay
+ time.sleep(1)
+ status_container.empty()
+
+ return {
+ 'results': g_results,
+ 'titles': g_titles,
+ 'summary': processed_results,
+ 'stats': {
+ 'organic_count': len(g_results.get('organic', [])),
+ 'questions_count': len(g_results.get('peopleAlsoAsk', [])),
+ 'related_count': len(g_results.get('relatedSearches', []))
+ }
+ }
+
+ else:
+ update_progress("No results found", "warning")
+ status_container.warning("â ī¸ No results found")
+ return None
+
except Exception as err:
- logger.error(f"Failed to do Google SERP research: {err}")
- return None
- # Not failing, as tavily would do same and then GPT-V to search.
+ error_msg = f"Search failed: {str(err)}"
+ update_progress(error_msg, "error")
+ logger.error(error_msg)
+ logger.debug("Stack trace:", exc_info=True)
+ raise
+
+ finally:
+ logger.info("="*50)
+ logger.info("Google SERP Search function completed")
+ logger.info("="*50)
-def do_tavily_ai_search(search_keywords, max_results=10):
+def do_tavily_ai_search(search_keywords, max_results=10, **kwargs):
""" Common function to do Tavily AI web research."""
try:
- # FIXME: Include the follow-up questions as blog FAQs.
logger.info(f"Doing Tavily AI search for: {search_keywords}")
- t_results = get_tavilyai_results(search_keywords, max_results)
- t_titles = tavily_extract_information(t_results, 'titles')
- t_answer = tavily_extract_information(t_results, 'answer')
- return(t_results, t_titles, t_answer)
+
+ # Prepare Tavily search parameters
+ tavily_params = {
+ 'max_results': max_results,
+ 'search_depth': 'advanced' if kwargs.get('search_depth', 3) > 2 else 'basic',
+ 'time_range': kwargs.get('time_range', 'year'),
+ 'include_domains': kwargs.get('include_domains', [""]) if kwargs.get('include_domains') else [""]
+ }
+
+ # Pass the parameters to get_tavilyai_results
+ t_results = get_tavilyai_results(
+ keywords=search_keywords,
+ **tavily_params
+ )
+
+ if t_results:
+ t_titles = tavily_extract_information(t_results, 'titles')
+ t_answer = tavily_extract_information(t_results, 'answer')
+ return(t_results, t_titles, t_answer)
+ else:
+ logger.warning("No results returned from Tavily AI search")
+ return None, None, None
except Exception as err:
logger.error(f"Failed to do Tavily AI Search: {err}")
+ return None, None, None
def do_metaphor_ai_research(search_keywords):
- """ """
+ """
+ Perform Metaphor AI research and return results with titles.
+
+ Args:
+ search_keywords (str): Keywords to search for
+
+ Returns:
+ tuple: (response_articles, titles) or (None, None) if search fails
+ """
try:
- logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}")
+ logger.info(f"Start Semantic/Neural web search with Metaphor: {search_keywords}")
response_articles = metaphor_search_articles(search_keywords)
- m_titles = metaphor_extract_titles_or_text(response_articles, return_titles=True)
- return(response_articles, m_titles)
+
+ if response_articles and 'data' in response_articles:
+ m_titles = [result.get('title', '') for result in response_articles['data'].get('results', [])]
+ return response_articles, m_titles
+ else:
+ logger.warning("No valid results from Metaphor search")
+ return None, None
+
except Exception as err:
logger.error(f"Failed to do Metaphor search: {err}")
+ return None, None
def do_google_pytrends_analysis(search_keywords):
@@ -163,4 +384,4 @@ def tavily_extract_information(json_data, keyword):
elif keyword == 'follow-query':
return json_data['follow_up_questions']
else:
- return f"Invalid keyword: {keyword}"
+ return f"Invalid keyword: {keyword}"
\ No newline at end of file
diff --git a/lib/ai_web_researcher/metaphor_basic_neural_web_search.py b/lib/ai_web_researcher/metaphor_basic_neural_web_search.py
index 76b5e6a9..209bab1e 100644
--- a/lib/ai_web_researcher/metaphor_basic_neural_web_search.py
+++ b/lib/ai_web_researcher/metaphor_basic_neural_web_search.py
@@ -116,55 +116,431 @@ def metaphor_find_similar(similar_url):
return search_response
-
-def metaphor_search_articles(query):
+def calculate_date_range(time_range: str) -> tuple:
"""
- Search for articles using the Metaphor API.
+ Calculate start and end dates based on time range selection.
+
+ Args:
+ time_range (str): One of 'past_day', 'past_week', 'past_month', 'past_year', 'anytime'
+
+ Returns:
+ tuple: (start_date, end_date) in ISO format with milliseconds
+ """
+ now = datetime.utcnow()
+ end_date = now.strftime('%Y-%m-%dT%H:%M:%S.999Z')
+
+ if time_range == 'past_day':
+ start_date = (now - timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%S.000Z')
+ elif time_range == 'past_week':
+ start_date = (now - timedelta(weeks=1)).strftime('%Y-%m-%dT%H:%M:%S.000Z')
+ elif time_range == 'past_month':
+ start_date = (now - timedelta(days=30)).strftime('%Y-%m-%dT%H:%M:%S.000Z')
+ elif time_range == 'past_year':
+ start_date = (now - timedelta(days=365)).strftime('%Y-%m-%dT%H:%M:%S.000Z')
+ else: # anytime
+ start_date = None
+ end_date = None
+
+ return start_date, end_date
+
+def metaphor_search_articles(query, search_options: dict = None):
+ """
+ Search for articles using the Metaphor/Exa API.
Args:
query (str): The search query.
- num_results (int): Number of results to retrieve.
- use_autoprompt (bool): Whether to use autoprompt.
- include_domains (list): List of domains to include.
- time_range (str): Time range for published articles ("day", "week", "month", "year", "anytime").
+ search_options (dict): Search configuration options including:
+ - num_results (int): Number of results to retrieve
+ - use_autoprompt (bool): Whether to use autoprompt
+ - include_domains (list): List of domains to include
+ - time_range (str): One of 'past_day', 'past_week', 'past_month', 'past_year', 'anytime'
+ - exclude_domains (list): List of domains to exclude
Returns:
- MetaphorResponse: The response from the Metaphor API.
+ dict: Search results and metadata
"""
- metaphor = get_metaphor_client()
+ exa = get_metaphor_client()
try:
- include_domains, start_published_date, num_results, similar_url = cfg_search_param('exa')
-
- logger.info(f"Metaphor web search with Date: {start_published_date} and Query: {query}")
+ # Initialize default search options
+ if search_options is None:
+ search_options = {}
+
+ # Get config parameters or use defaults
try:
- search_response = metaphor.search_and_contents(
- query,
- include_domains=include_domains,
- use_autoprompt=True,
- start_published_date=start_published_date,
- num_results=num_results
- )
- except Exception as err:
- logger.error(f"Failed in metaphor.search_and_contents: {err}")
-
- # From each webpage, get a summary of the web page.
- contents_response = search_response.results
- # FIXME: Need to summarize for smaller input context window.
-# for content in tqdm(contents_response, desc="Reading Web URL content:", unit="content"):
-# summarized_content = summarize_web_content(content.text, "gemini")
-# content.text = summarized_content
-
- print_search_result(contents_response)
+ include_domains, _, num_results, _ = cfg_search_param('exa')
+ except Exception as cfg_err:
+ logger.warning(f"Failed to load config parameters: {cfg_err}. Using defaults.")
+ include_domains = None
+ num_results = 10
+
+ # Calculate date range based on time_range option
+ time_range = search_options.get('time_range', 'anytime')
+ start_published_date, end_published_date = calculate_date_range(time_range)
+
+ # Prepare search parameters
+ search_params = {
+ 'num_results': search_options.get('num_results', num_results),
+ 'summary': True, # Always get summaries
+ 'include_domains': search_options.get('include_domains', include_domains),
+ 'use_autoprompt': search_options.get('use_autoprompt', True),
+ }
+
+ # Add date parameters only if they are not None
+ if start_published_date:
+ search_params['start_published_date'] = start_published_date
+ if end_published_date:
+ search_params['end_published_date'] = end_published_date
+
+ logger.info(f"Exa web search with params: {search_params} and Query: {query}")
+
+ # Execute search
+ search_response = exa.search_and_contents(
+ query,
+ **search_params
+ )
+
+ if not search_response or not hasattr(search_response, 'results'):
+ logger.warning("No results returned from Exa search")
+ return None
+
+ # Get cost information safely
+ try:
+ cost_dollars = {
+ 'total': float(search_response.cost_dollars['total']),
+ } if hasattr(search_response, 'cost_dollars') else None
+ except Exception as cost_err:
+ logger.warning(f"Error processing cost information: {cost_err}")
+ cost_dollars = None
+
+ # Format response to match expected structure
+ formatted_response = {
+ "data": {
+ "requestId": getattr(search_response, 'request_id', None),
+ "resolvedSearchType": "neural",
+ "results": [
+ {
+ "id": result.url,
+ "title": result.title,
+ "url": result.url,
+ "publishedDate": result.published_date if hasattr(result, 'published_date') else None,
+ "author": getattr(result, 'author', None),
+ "score": getattr(result, 'score', 0),
+ "summary": result.summary if hasattr(result, 'summary') else None,
+ "text": result.text if hasattr(result, 'text') else None,
+ "image": getattr(result, 'image', None),
+ "favicon": getattr(result, 'favicon', None)
+ }
+ for result in search_response.results
+ ],
+ "costDollars": cost_dollars
+ }
+ }
+
+ # Get AI-generated answer from Metaphor
+ try:
+ exa_answer = get_exa_answer(query)
+ if exa_answer:
+ formatted_response.update(exa_answer)
+ except Exception as exa_err:
+ logger.warning(f"Error getting Exa answer: {exa_err}")
+
+ # Get AI-generated answer from Tavily
+ try:
+ # Import the function directly from the module
+ import importlib
+ tavily_module = importlib.import_module('lib.ai_web_researcher.tavily_ai_search')
+ if hasattr(tavily_module, 'do_tavily_ai_search'):
+ tavily_response = tavily_module.do_tavily_ai_search(query)
+ if tavily_response and 'answer' in tavily_response:
+ formatted_response.update({
+ "tavily_answer": tavily_response.get("answer"),
+ "tavily_citations": tavily_response.get("citations", []),
+ "tavily_cost_dollars": tavily_response.get("costDollars", {"total": 0})
+ })
+ else:
+ logger.warning("do_tavily_ai_search function not found in tavily_ai_search module")
+ except Exception as tavily_err:
+ logger.warning(f"Error getting Tavily answer: {tavily_err}")
+
+ # Display results in Streamlit
+ streamlit_display_metaphor_results(formatted_response)
+ return formatted_response
- if similar_url:
- logger.info(f"Doing similar/semantic search for URL: {similar_url}")
- metaphor_find_similar(similar_url)
- return contents_response
-
except Exception as e:
- logger.error(f"Error in Metaphor searching articles: {e}")
- raise
+ logger.error(f"Error in Exa searching articles: {e}")
+ return None
+def streamlit_display_metaphor_results(metaphor_response: dict):
+ """
+ Display Metaphor search results in Streamlit with enhanced metrics and popovers
+
+ Args:
+ metaphor_response (dict): Response from Metaphor search
+ """
+ if not metaphor_response or 'data' not in metaphor_response:
+ st.error("No valid Metaphor search results to display")
+ return
+
+ # Initialize session state variables if they don't exist
+ if 'search_insights' not in st.session_state:
+ st.session_state.search_insights = None
+
+ if 'metaphor_response' not in st.session_state:
+ st.session_state.metaphor_response = metaphor_response
+
+ if 'insights_generated' not in st.session_state:
+ st.session_state.insights_generated = False
+
+ # Update the stored metaphor_response with the latest data
+ st.session_state.metaphor_response = metaphor_response
+
+ # Display metrics in columns
+ col1, col2, col3 = st.columns(3)
+
+ # Calculate metrics
+ results = metaphor_response['data']['results']
+ total_results = len(results)
+ avg_score = sum(r['score'] for r in results if r['score']) / total_results if total_results > 0 else 0
+
+ with col1:
+ st.metric(
+ label="Total Results",
+ value=total_results
+ )
+ with col2:
+ if metaphor_response['data'].get('costDollars'):
+ cost = metaphor_response['data']['costDollars']
+ st.metric(
+ label="Search Cost",
+ value=f"${cost['total']:.3f}"
+ )
+ with col3:
+ st.metric(
+ label="Average Relevance Score",
+ value=f"{avg_score:.2f}"
+ )
+
+ # Display AI-generated answers side by side
+ if 'answer' in metaphor_response or 'tavily_answer' in metaphor_response:
+ st.markdown("### đ¤ AI-Generated Research Answers")
+
+ # Create two columns for side-by-side display
+ tavily_col, metaphor_col = st.columns(2)
+
+ # Display Tavily answer if available
+ with tavily_col:
+ if 'tavily_answer' in metaphor_response:
+ st.markdown("#### đ Tavily AI Answer")
+ st.markdown(f"""
+
+ {metaphor_response['tavily_answer']}
+
+ """, unsafe_allow_html=True)
+
+ if metaphor_response.get('tavily_cost_dollars'):
+ st.caption(f"Tavily Answer Cost: ${metaphor_response['tavily_cost_dollars']['total']:.3f}")
+
+ if metaphor_response.get('tavily_citations'):
+ with st.expander("đ Tavily Sources"):
+ for idx, citation in enumerate(metaphor_response['tavily_citations'], 1):
+ st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})")
+ else:
+ st.markdown("#### đ Tavily AI Answer")
+ st.info("No Tavily answer available for this query.")
+
+ # Display Metaphor answer if available
+ with metaphor_col:
+ if 'answer' in metaphor_response:
+ st.markdown("#### đ Metaphor AI Answer")
+ st.markdown(f"""
+
+ {metaphor_response['answer']}
+
+ """, unsafe_allow_html=True)
+
+ if metaphor_response.get('answerCostDollars'):
+ st.caption(f"Metaphor Answer Cost: ${metaphor_response['answerCostDollars']['total']:.3f}")
+
+ if metaphor_response.get('citations'):
+ with st.expander("đ Metaphor Sources"):
+ for idx, citation in enumerate(metaphor_response['citations'], 1):
+ st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})")
+ else:
+ st.markdown("#### đ Metaphor AI Answer")
+ st.info("No Metaphor answer available for this query.")
+
+ # Add "Get Search Insights" button - moved outside the AI answers conditional
+ st.markdown("### đ Search Insights")
+
+ # Create a container for the insights
+ insights_container = st.container()
+
+ # Use a button with a callback function
+ if st.button("Generate Search Insights", type="primary"):
+ # Set a flag in session state to indicate that insights should be generated
+ st.session_state.insights_generated = True
+
+ # Store the current metaphor_response in session state
+ st.session_state.metaphor_response = metaphor_response
+
+ # Redirect to the same page with a query parameter to trigger insights generation
+ st.experimental_rerun()
+
+ # If insights should be generated, do it in a separate container
+ if st.session_state.insights_generated:
+ with insights_container:
+ with st.spinner("Analyzing search results to generate insights..."):
+ # Get the stored metaphor_response from session state
+ stored_response = st.session_state.metaphor_response
+ stored_results = stored_response['data']['results']
+
+ # Prepare data for analysis
+ analysis_data = {
+ "metaphor_results": stored_results,
+ "metaphor_answer": stored_response.get("answer", ""),
+ "tavily_answer": stored_response.get("tavily_answer", ""),
+ "metaphor_citations": stored_response.get("citations", []),
+ "tavily_citations": stored_response.get("tavily_citations", [])
+ }
+
+ # Create the analysis prompt
+ analysis_prompt = f"""
+ **Search Intent & User Needs Analysis**
+
+ I have conducted research using both Tavily and Metaphor AI search engines.
+ Below is the data from both sources:
+
+ **Metaphor AI Answer:**
+ {analysis_data["metaphor_answer"]}
+
+ **Tavily AI Answer:**
+ {analysis_data["tavily_answer"]}
+
+ **Search Results:**
+ {[f"{i+1}. {r['title']} - {r['summary']}" for i, r in enumerate(analysis_data["metaphor_results"])]}
+
+ **Citations:**
+ {[f"{i+1}. {c.get('title', 'Untitled')} - {c.get('url', 'No URL')}" for i, c in enumerate(analysis_data["metaphor_citations"] + analysis_data["tavily_citations"])]}
+
+ Based on this research data, please provide the following insights:
+
+ **Search Intent & User Needs**
+ ```
+ Review the research data and identify:
+ 1. The distribution of search intent (categorize as Informational/Commercial/Navigational/Transactional)
+ 2. Most common user questions and their patterns
+ 3. Frequently mentioned pain points or challenges
+ 4. Recurring solutions or approaches to addressing these challenges
+ 5. Gaps between user questions and available answers
+
+ Present findings in a structured format with percentages and specific examples.
+ ```
+
+ Format your response as a comprehensive analysis with clear sections, bullet points, and examples from the research data.
+ """
+
+ try:
+ # Import the llm_text_gen function
+ import importlib
+ text_gen_module = importlib.import_module('lib.gpt_providers.text_generation.main_text_generation')
+ if hasattr(text_gen_module, 'llm_text_gen'):
+ # Generate insights using llm_text_gen
+ insights = text_gen_module.llm_text_gen(analysis_prompt)
+
+ # Store insights in session state
+ st.session_state.search_insights = insights
+
+ # Reset the flag to prevent regeneration on next rerun
+ st.session_state.insights_generated = False
+ else:
+ st.error("Could not find llm_text_gen function in the text generation module.")
+ except Exception as e:
+ st.error(f"Error generating insights: {str(e)}")
+ logger.error(f"Error generating insights: {e}")
+
+ # Display insights if they exist in session state
+ if st.session_state.search_insights:
+ with insights_container:
+ st.markdown("### đ Search Intent & User Needs Analysis")
+ st.markdown(st.session_state.search_insights)
+
+ # Create DataFrame from results
+ df = pd.DataFrame(results)
+
+ # Prepare data for display
+ display_df = df.copy()
+ display_df['Visit Site'] = display_df['url']
+
+ # Format publishedDate as string if it exists
+ if 'publishedDate' in display_df.columns:
+ display_df['publishedDate'] = display_df['publishedDate'].apply(
+ lambda x: x[:10] if isinstance(x, str) else 'N/A'
+ )
+
+ # Configure columns for data editor
+ columns = {
+ 'title': st.column_config.TextColumn(
+ 'Title',
+ width='large',
+ required=True,
+ ),
+ 'author': st.column_config.TextColumn(
+ 'Author',
+ width='medium',
+ ),
+ 'publishedDate': st.column_config.TextColumn(
+ 'Published Date',
+ width='medium',
+ ),
+ 'score': st.column_config.NumberColumn(
+ 'Relevance Score',
+ width='small',
+ format="%.2f"
+ ),
+ 'Visit Site': st.column_config.LinkColumn(
+ 'Link',
+ width='small',
+ display_text='Visit Site',
+ ),
+ 'summary': st.column_config.TextColumn(
+ 'Summary',
+ width='large',
+ required=True,
+ )
+ }
+
+ # Display results in data editor
+ st.data_editor(
+ display_df,
+ column_config=columns,
+ hide_index=True,
+ num_rows='dynamic',
+ disabled=True,
+ column_order=['title', 'author', 'publishedDate', 'score', 'summary', 'Visit Site']
+ )
+
+ # Display detailed summaries with popovers
+ st.write("### Detailed Summaries")
+ for idx, result in enumerate(results, 1):
+ with st.expander(f"đ {result['title']}", expanded=False):
+ col1, col2 = st.columns([3, 1])
+ with col1:
+ st.markdown(f"**Summary**")
+ st.markdown(result['summary'])
+ with col2:
+ st.markdown("**Details**")
+ st.markdown(f"**Author:** {result['author'] if result['author'] else 'N/A'}")
+ st.markdown(f"**Published:** {result['publishedDate'][:10] if result['publishedDate'] else 'N/A'}")
+ st.markdown(f"**Score:** {result['score']:.2f}")
+ st.markdown(f"[Visit Site]({result['url']})")
+
+ # Display search metadata
+ st.divider()
+ col1, col2 = st.columns(2)
+ with col1:
+ st.caption(f"Search Type: {metaphor_response['data']['resolvedSearchType']}")
+ with col2:
+ st.caption(f"Request ID: {metaphor_response['data']['requestId']}")
def metaphor_news_summarizer(news_keywords):
@@ -240,3 +616,56 @@ def metaphor_scholar_search(query, include_domains=None, time_range="anytime"):
return response
except Exception as e:
logger.error(f"Error in searching papers: {e}")
+
+def get_exa_answer(query: str, system_prompt: str = None) -> dict:
+ """
+ Get an AI-generated answer for a query using Exa's answer endpoint.
+
+ Args:
+ query (str): The search query to get an answer for
+ system_prompt (str, optional): Custom system prompt for the LLM. If None, uses default prompt.
+
+ Returns:
+ dict: Response containing answer, citations, and cost information
+ {
+ "answer": str,
+ "citations": list[dict],
+ "costDollars": dict
+ }
+ """
+ exa = get_metaphor_client()
+ try:
+ # Use default system prompt if none provided
+ if system_prompt is None:
+ system_prompt = (
+ "I am doing research to write factual content. "
+ "Help me find answers for content generation task. "
+ "Provide detailed, well-structured answers with clear citations."
+ )
+
+ logger.info(f"Getting Exa answer for query: {query}")
+ logger.debug(f"Using system prompt: {system_prompt}")
+
+ # Make API call to get answer with system_prompt parameter
+ result = exa.answer(
+ query,
+ model="exa",
+ text=True # Include full text in citations
+ )
+
+ if not result or not result.get('answer'):
+ logger.warning("No answer received from Exa")
+ return None
+
+ # Format response to match expected structure
+ response = {
+ "answer": result.get('answer'),
+ "citations": result.get('citations', []),
+ "costDollars": result.get('costDollars', {"total": 0})
+ }
+
+ return response
+
+ except Exception as e:
+ logger.error(f"Error getting Exa answer: {e}")
+ return None
diff --git a/lib/ai_web_researcher/tavily_ai_search.py b/lib/ai_web_researcher/tavily_ai_search.py
index 35d9f2c9..3063a7d0 100644
--- a/lib/ai_web_researcher/tavily_ai_search.py
+++ b/lib/ai_web_researcher/tavily_ai_search.py
@@ -49,17 +49,9 @@ from tenacity import retry, stop_after_attempt, wait_random_exponential
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
-def get_tavilyai_results(keywords, max_results=5):
+def get_tavilyai_results(keywords, max_results=5, include_domains=None, search_depth="advanced", **kwargs):
"""
Get Tavily AI search results based on specified keywords and options.
-
- Args:
- keywords (str): Keywords for Tavily AI search.
- include_urls (str): Comma-separated URLs to include in the search.
- search_depth (str, optional): Search depth option (default is "advanced").
-
- Returns:
- dict: Tavily AI search results.
"""
# Run Tavily search
logger.info(f"Running Tavily search on: {keywords}")
@@ -74,56 +66,100 @@ def get_tavilyai_results(keywords, max_results=5):
client = TavilyClient(api_key=api_key)
except Exception as err:
logger.error(f"Failed to create Tavily client. Check TAVILY_API_KEY: {err}")
-
- # Read search config params from the file.
- try:
- include_urls = cfg_search_param('tavily')
- except Exception as err:
- logger.error(f"Failed to read search params from main_config: {err}")
+ raise
try:
- if include_urls:
- tavily_search_result = client.search(keywords,
- search_depth="advanced",
- include_answer=True,
- max_results=max_results,
- include_domains=include_urls)
- else:
- tavily_search_result = client.search(keywords,
- search_depth = "advanced",
- include_answer=True,
- max_results=max_results)
+ # Create search parameters exactly matching Tavily's API format
+ tavily_search_result = client.search(
+ query=keywords,
+ search_depth="advanced",
+ time_range="year",
+ include_answer="advanced",
+ include_domains=[""] if not include_domains else include_domains,
+ max_results=max_results
+ )
+
+ if tavily_search_result:
+ print_result_table(tavily_search_result)
+ streamlit_display_results(tavily_search_result)
+ return tavily_search_result
+ return None
- print_result_table(tavily_search_result)
- streamlit_display_results(tavily_search_result)
- return(tavily_search_result)
except Exception as err:
logger.error(f"Failed to do Tavily Research: {err}")
+ raise
def streamlit_display_results(output_data):
- """Display Tavily AI search results in Streamlit UI."""
+ """Display Tavily AI search results in Streamlit UI with enhanced visualization."""
- # Prepare data for display
- table_data = []
+ # Display the 'answer' in Streamlit with enhanced styling
+ answer = output_data.get("answer", "No answer available")
+ st.markdown("### đ¤ AI-Generated Answer")
+ st.markdown(f"""
+
+ {answer}
+
+ """, unsafe_allow_html=True)
+
+ # Display follow-up questions if available
+ follow_up_questions = output_data.get("follow_up_questions", [])
+ if follow_up_questions:
+ st.markdown("### â Follow-up Questions")
+ for i, question in enumerate(follow_up_questions, 1):
+ st.markdown(f"**{i}.** {question}")
+
+ # Prepare data for display with dataeditor
+ st.markdown("### đ Search Results")
+
+ # Create a DataFrame for the results
+ import pandas as pd
+ results_data = []
+
for item in output_data.get("results", []):
title = item.get("title", "")
snippet = item.get("content", "")
link = item.get("url", "")
- table_data.append([title, snippet, link])
+ results_data.append({
+ "Title": title,
+ "Content": snippet,
+ "Link": link
+ })
+
+ if results_data:
+ df = pd.DataFrame(results_data)
+
+ # Display the data editor
+ st.data_editor(
+ df,
+ column_config={
+ "Title": st.column_config.TextColumn(
+ "Title",
+ help="Article title",
+ width="medium",
+ ),
+ "Content": st.column_config.TextColumn(
+ "Content",
+ help="Click the button below to view full content",
+ width="large",
+ ),
+ "Link": st.column_config.LinkColumn(
+ "Link",
+ help="Click to visit the website",
+ width="small",
+ display_text="Visit Site"
+ ),
+ },
+ hide_index=True,
+ use_container_width=True,
+ )
- # Display the table in Streamlit
- st.table(table_data)
-
- # Display the 'answer' in Streamlit
- answer = output_data.get("answer", "No answer available")
- st.write(f"**The answer to your search query:** {answer}")
-
- # Display follow-up questions if available
- follow_up_questions = output_data.get("follow_up_questions", [])
- if follow_up_questions:
- st.write(f"**Follow-up questions for the query:** {output_data.get('query')}")
- st.write(", ".join(follow_up_questions))
+ # Add popovers for full content display
+ for item in output_data.get("results", []):
+ with st.popover(f"View content: {item.get('title', '')[:50]}..."):
+ st.markdown(item.get("content", ""))
+ else:
+ st.info("No results found for your search query.")
def print_result_table(output_data):
diff --git a/lib/alwrity_ui/display_google_serp_results.py b/lib/alwrity_ui/display_google_serp_results.py
new file mode 100644
index 00000000..636cadf1
--- /dev/null
+++ b/lib/alwrity_ui/display_google_serp_results.py
@@ -0,0 +1,277 @@
+import streamlit as st
+import logging
+from datetime import datetime
+from typing import Dict, Optional, Any
+
+# Configure module logger
+logger = logging.getLogger(__name__)
+
+def display_research_results(results: Dict[str, Any]) -> None:
+ """
+ Display research results in a structured format with tabs.
+
+ Args:
+ results (dict): Processed research results containing summary and data
+ """
+ if not results:
+ st.warning("No results to display")
+ return
+
+ # Create tabs for different result sections
+ tabs = st.tabs(["đ Summary", "đ Results", "đ Statistics"])
+
+ with tabs[0]:
+ display_summary_section(results)
+
+ with tabs[1]:
+ if results['source'] == 'gemini':
+ display_gemini_results(results)
+ else:
+ display_serp_results(results)
+
+ with tabs[2]:
+ display_statistics(results)
+
+def process_research_results(results: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+ """Process and format research results."""
+ logger.info("Processing research results")
+
+ try:
+ if not results:
+ return None
+
+ processed = {
+ 'timestamp': str(datetime.now()),
+ 'source': results.get('source', 'unknown'),
+ 'summary': {},
+ 'data': {}
+ }
+
+ if results.get('source') == 'gemini':
+ processed.update(process_gemini_results(results))
+ else:
+ processed.update(process_serp_results(results))
+
+ logger.info("Results processing completed")
+ return processed
+
+ except Exception as err:
+ logger.error(f"Failed to process results: {err}", exc_info=True)
+ return None
+
+def process_search_results(search_results: Dict[str, Any], search_type: str = "general") -> Optional[Dict[str, Any]]:
+ """Process search results and prepare for display."""
+ logger.info(f"Processing {search_type} search results")
+
+ try:
+ if not search_results:
+ return None
+
+ processed = {
+ 'organic': process_organic_results(search_results.get('organic', [])),
+ 'peopleAlsoAsk': process_paa_results(search_results.get('peopleAlsoAsk', [])),
+ 'relatedSearches': process_related_searches(search_results.get('relatedSearches', [])),
+ 'metadata': {
+ 'timestamp': str(datetime.now()),
+ 'type': search_type
+ }
+ }
+
+ return processed
+
+ except Exception as err:
+ logger.error(f"Error processing search results: {err}", exc_info=True)
+ return None
+
+# Helper functions for result processing
+def process_organic_results(results):
+ """Process organic search results."""
+ return [{
+ 'title': result.get('title', 'No Title'),
+ 'link': result.get('link', '#'),
+ 'snippet': result.get('snippet', 'No snippet available'),
+ 'position': result.get('position', 'N/A')
+ } for result in results]
+
+def process_paa_results(results):
+ """Process People Also Ask results."""
+ return [{
+ 'question': result.get('title', ''),
+ 'answer': result.get('snippet', 'No answer available'),
+ 'link': result.get('link', '#')
+ } for result in results]
+
+def process_related_searches(results):
+ """Process related searches."""
+ return [query.get('query', '') for query in results]
+
+def process_gemini_results(results: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Process Gemini API research results.
+
+ Args:
+ results (dict): Raw Gemini research results
+
+ Returns:
+ dict: Processed results with summary and data
+ """
+ gemini_data = results.get('results', {})
+ return {
+ 'summary': {
+ 'main_findings': gemini_data.get('main_response', ''),
+ 'sources': gemini_data.get('grounding_data', []),
+ 'processing_time': gemini_data.get('metadata', {}).get('timestamp'),
+ 'total_sources': len(gemini_data.get('grounding_data', [])),
+ 'model': gemini_data.get('metadata', {}).get('model', 'unknown')
+ },
+ 'data': gemini_data
+ }
+
+def process_serp_results(results: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Process SERP search results.
+
+ Args:
+ results (dict): Raw SERP results
+
+ Returns:
+ dict: Processed results with summary and data
+ """
+ organic_results = results.get('organic', [])
+ paa_results = results.get('peopleAlsoAsk', [])
+ related_searches = results.get('relatedSearches', [])
+
+ return {
+ 'summary': {
+ 'total_results': len(organic_results),
+ 'sources': [result.get('link') for result in organic_results],
+ 'titles': [result.get('title') for result in organic_results],
+ 'total_questions': len(paa_results),
+ 'total_related': len(related_searches)
+ },
+ 'data': {
+ 'organic': process_organic_results(organic_results),
+ 'peopleAlsoAsk': process_paa_results(paa_results),
+ 'relatedSearches': process_related_searches(related_searches)
+ }
+ }
+
+# Display helper functions
+def display_summary_section(results):
+ """Display summary section of results."""
+ st.markdown("### đ Research Summary")
+ st.markdown(f"""
+ - **Source**: {results['source'].title()}
+ - **Time**: {results['timestamp']}
+ - **Total Sources**: {len(results.get('summary', {}).get('sources', []))}
+ """)
+
+def display_gemini_results(results):
+ """Display Gemini-specific results."""
+ st.markdown("### đ¤ Gemini Research Findings")
+ st.write(results['summary']['main_findings'])
+
+ with st.expander("đ Sources and References", expanded=False):
+ st.write(results['data'].get('grounding_data', 'No sources available'))
+
+def display_serp_results(results):
+ """Display SERP-specific results."""
+ st.markdown("### đ Search Results")
+
+ for result in results['data'].get('organic', []):
+ with st.expander(f"đ {result['title']}", expanded=False):
+ st.markdown(f"""
+ **Rank:** {result['position']}
+
+ **Link:** [{result['link']}]({result['link']})
+
+ **Snippet:**
+ {result['snippet']}
+ """)
+
+def display_statistics(results: Dict[str, Any]) -> None:
+ """
+ Display statistical information about search results.
+
+ Args:
+ results (dict): Processed research results
+ """
+ st.markdown("### đ Research Statistics")
+
+ # Source-specific metrics
+ if results['source'] == 'gemini':
+ col1, col2 = st.columns(2)
+ with col1:
+ st.metric(
+ "Sources Analyzed",
+ results.get('summary', {}).get('total_sources', 0)
+ )
+ with col2:
+ st.metric(
+ "Model Used",
+ results.get('summary', {}).get('model', 'Unknown')
+ )
+
+ else: # SERP results
+ col1, col2, col3 = st.columns(3)
+ with col1:
+ st.metric(
+ "Organic Results",
+ results.get('summary', {}).get('total_results', 0)
+ )
+ with col2:
+ st.metric(
+ "Related Questions",
+ results.get('summary', {}).get('total_questions', 0)
+ )
+ with col3:
+ st.metric(
+ "Related Searches",
+ results.get('summary', {}).get('total_related', 0)
+ )
+
+ # Common metrics
+ st.markdown("#### đ Timing Information")
+ st.info(f"Research completed at: {results['timestamp']}")
+
+ # Display data quality metrics
+ st.markdown("#### đ Data Quality")
+ quality_metrics = calculate_quality_metrics(results)
+
+ col1, col2 = st.columns(2)
+ with col1:
+ st.progress(quality_metrics['completeness'])
+ st.caption("Data Completeness")
+ with col2:
+ st.progress(quality_metrics['relevance'])
+ st.caption("Estimated Relevance")
+
+def calculate_quality_metrics(results: Dict[str, Any]) -> Dict[str, float]:
+ """
+ Calculate quality metrics for the research results.
+
+ Args:
+ results (dict): Processed research results
+
+ Returns:
+ dict: Quality metrics including completeness and relevance scores
+ """
+ try:
+ if results['source'] == 'gemini':
+ completeness = 1.0 if results['summary']['main_findings'] else 0.0
+ relevance = 0.8 if results['summary']['sources'] else 0.4
+ else:
+ organic_results = results.get('summary', {}).get('total_results', 0)
+ completeness = min(organic_results / 10, 1.0) # Normalize to 0-1
+ has_paa = bool(results.get('summary', {}).get('total_questions', 0))
+ has_related = bool(results.get('summary', {}).get('total_related', 0))
+ relevance = (0.6 + (0.2 if has_paa else 0) + (0.2 if has_related else 0))
+
+ return {
+ 'completeness': completeness,
+ 'relevance': relevance
+ }
+
+ except Exception as err:
+ logger.error(f"Error calculating quality metrics: {err}")
+ return {'completeness': 0.0, 'relevance': 0.0}
\ No newline at end of file
diff --git a/lib/alwrity_ui/keyword_web_researcher.py b/lib/alwrity_ui/keyword_web_researcher.py
index 97a9ed5c..12bf1509 100644
--- a/lib/alwrity_ui/keyword_web_researcher.py
+++ b/lib/alwrity_ui/keyword_web_researcher.py
@@ -323,7 +323,8 @@ def do_web_research():
"num_results": 10,
"time_range": "past month",
"include_domains": "",
- "similar_url": ""
+ "similar_url": "",
+ "search_mode": "google" # Default search mode
}
# Define the research options dialog function
@@ -425,6 +426,12 @@ def do_web_research():
horizontal=True,
help="Choose your preferred research method"
)
+
+ # Map the selected option to the search_mode value
+ for mode, label, _, _ in search_options:
+ if label == selected_option:
+ st.session_state.research_options["search_mode"] = mode
+ break
else:
st.warning("No search methods available. Please configure API keys.")
@@ -439,7 +446,7 @@ def do_web_research():
st.rerun()
# Main interface
- st.title("Keyword Research Assistant")
+ st.title("ALwrity Web Researcher")
# Primary search area with help popover
with st.popover("âšī¸ Keyword Research Tips"):
@@ -450,6 +457,7 @@ def do_web_research():
3. **Search Depth**: Higher depth = more comprehensive but slower
4. **Target Audience**: Affects content recommendations
5. **Content Type**: Influences research focus
+ 6. **Search Mode**: Choose between traditional web research(Google), AI-powered search(Tavily and Metaphor) and Deep Researcher
""")
col1, col2 = st.columns([3, 1])
@@ -484,6 +492,7 @@ def do_web_research():
# Execute search with all parameters
web_research_result = gpt_web_researcher(
search_keywords=st.session_state.research_options["primary_keywords"],
+ search_mode=st.session_state.research_options["search_mode"],
related_keywords=st.session_state.research_options["related_keywords"],
target_audience=st.session_state.research_options["target_audience"],
content_type=st.session_state.research_options["content_type"],