AI Web Researcher: Added Exa answer and Tavily answer to the metaphor search results

Added AI insights to the metaphor search results
Better display of AI search results
This commit is contained in:
ajaysi
2025-04-05 14:55:28 +05:30
parent 4fc7ba7055
commit d7cfe2dd31
5 changed files with 1087 additions and 115 deletions

View File

@@ -22,14 +22,23 @@
import os
import json
import time
from pathlib import Path
import sys
from datetime import datetime
import streamlit as st
from lib.alwrity_ui.display_google_serp_results import (
process_research_results,
process_search_results,
display_research_results
)
from .tavily_ai_search import get_tavilyai_results
from .metaphor_basic_neural_web_search import metaphor_find_similar, metaphor_search_articles
from .metaphor_basic_neural_web_search import metaphor_search_articles
from .google_serp_search import google_search
from .google_trends_researcher import do_google_trends_analysis
#from .google_gemini_web_researcher import do_gemini_web_research
from loguru import logger
# Configure logger
@@ -40,59 +49,271 @@ logger.add(sys.stdout,
)
def gpt_web_researcher(search_keywords):
""" Keyword based web researcher, basic, neural and Semantic search."""
def gpt_web_researcher(search_keywords, search_mode, **kwargs):
"""Keyword based web researcher with progress tracking."""
logger.info(f"Starting web research - Keywords: {search_keywords}, Mode: {search_mode}")
logger.debug(f"Additional parameters: {kwargs}")
try:
google_search_result = do_google_serp_search(search_keywords)
tavily_search_result = do_tavily_ai_search(search_keywords)
metaphor_search_result = do_metaphor_ai_research(search_keywords)
gtrends_search_result = do_google_pytrends_analysis(search_keywords)
# get_rag_results(search_query)
print(f"\n\nReview the analysis in this file at: {os.environ.get('SEARCH_SAVE_FILE')}\n")
# Initialize result container
research_results = None
# Create status containers
status_container = st.empty()
progress_bar = st.progress(0)
def update_progress(message, progress=None, level="info"):
if progress is not None:
progress_bar.progress(progress)
if level == "error":
status_container.error(f"🚫 {message}")
elif level == "warning":
status_container.warning(f"⚠️ {message}")
else:
status_container.info(f"🔄 {message}")
logger.debug(f"Progress update [{level}]: {message}")
if search_mode == "google":
logger.info("Starting Google research pipeline")
try:
# First try Google SERP
update_progress("Initiating SERP search...", progress=10)
serp_results = do_google_serp_search(search_keywords, **kwargs)
if serp_results and serp_results.get('organic'):
logger.info("SERP search successful")
update_progress("SERP search completed", progress=40)
research_results = serp_results
else:
logger.warning("SERP search returned no results, falling back to Gemini")
update_progress("No SERP results, trying Gemini...", progress=45)
# Keep it commented. Fallback to Gemini
#try:
# gemini_results = do_gemini_web_research(search_keywords)
# if gemini_results:
# logger.info("Gemini research successful")
# update_progress("Gemini research completed", progress=80)
# research_results = {
# 'source': 'gemini',
# 'results': gemini_results
# }
#except Exception as gemini_err:
# logger.error(f"Gemini research failed: {gemini_err}")
# update_progress("Gemini research failed", level="warning")
if research_results:
update_progress("Processing final results...", progress=90)
processed_results = process_research_results(research_results)
if processed_results:
update_progress("Research completed!", progress=100, level="success")
display_research_results(processed_results)
return processed_results
else:
error_msg = "Failed to process research results"
logger.warning(error_msg)
update_progress(error_msg, level="warning")
return None
else:
error_msg = "No results from either SERP or Gemini"
logger.warning(error_msg)
update_progress(error_msg, level="warning")
return None
except Exception as search_err:
error_msg = f"Research pipeline failed: {str(search_err)}"
logger.error(error_msg, exc_info=True)
update_progress(error_msg, level="error")
raise
elif search_mode == "ai":
logger.info("Starting AI research pipeline")
try:
# Do Tavily AI Search
update_progress("Initiating Tavily AI search...", progress=10)
# Extract relevant parameters for Tavily search
include_domains = kwargs.pop('include_domains', None)
search_depth = kwargs.pop('search_depth', 'advanced')
# Pass the parameters to get_tavilyai_results
t_results = get_tavilyai_results(
keywords=search_keywords,
max_results=kwargs.get('num_results', 10),
include_domains=include_domains,
search_depth=search_depth,
**kwargs
)
# Do Metaphor AI Search
update_progress("Initiating Metaphor AI search...", progress=50)
metaphor_results, metaphor_titles = do_metaphor_ai_research(search_keywords)
if metaphor_results is None:
update_progress("Metaphor AI search failed, continuing with Tavily results only...", level="warning")
else:
update_progress("Metaphor AI search completed successfully", progress=75)
except Exception as ai_err:
error_msg = f"AI research pipeline failed: {str(ai_err)}"
logger.error(error_msg, exc_info=True)
update_progress(error_msg, level="error")
raise
else:
error_msg = f"Unsupported search mode: {search_mode}"
logger.error(error_msg)
update_progress(error_msg, level="error")
raise ValueError(error_msg)
except Exception as err:
logger.error(f"Failed in gpt_web_researcher: {err}")
error_msg = f"Failed in gpt_web_researcher: {str(err)}"
logger.error(error_msg, exc_info=True)
if 'update_progress' in locals():
update_progress(error_msg, level="error")
raise
def do_google_serp_search(search_keywords):
""" COmmon function to do google SERP analysis and return results. """
# FIXME: Add a return filter to either return full json, titles, PAA, relatedsearches etc.
def do_google_serp_search(search_keywords, status_container, update_progress, **kwargs):
"""Perform Google SERP analysis with sidebar progress tracking."""
logger.info("="*50)
logger.info("Starting Google SERP Search")
logger.info("="*50)
try:
logger.info(f"Doing Google search for: {search_keywords}\n")
# Validate parameters
update_progress("Validating search parameters")
status_container.info("📝 Validating parameters...")
if not search_keywords or not isinstance(search_keywords, str):
logger.error(f"Invalid search keywords: {search_keywords}")
raise ValueError("Search keywords must be a non-empty string")
# Update search initiation
update_progress(f"Initiating search for: '{search_keywords}'")
status_container.info("🌐 Querying search API...")
logger.info(f"Search params: {kwargs}")
# Execute search
g_results = google_search(search_keywords)
if g_results:
# Log success
update_progress("Search completed successfully", "success")
# Update statistics
stats = f"""Found:
- {len(g_results.get('organic', []))} organic results
- {len(g_results.get('peopleAlsoAsk', []))} related questions
- {len(g_results.get('relatedSearches', []))} related searches"""
update_progress(stats)
# Process results
update_progress("Processing search results")
status_container.info("⚡ Processing results...")
processed_results = process_search_results(g_results)
# Extract titles
update_progress("Extracting information")
g_titles = extract_info(g_results, 'titles')
return(g_results, g_titles)
# Final success
update_progress("Analysis completed successfully", "success")
status_container.success("✨ Research completed!")
# Clear main status after delay
time.sleep(1)
status_container.empty()
return {
'results': g_results,
'titles': g_titles,
'summary': processed_results,
'stats': {
'organic_count': len(g_results.get('organic', [])),
'questions_count': len(g_results.get('peopleAlsoAsk', [])),
'related_count': len(g_results.get('relatedSearches', []))
}
}
else:
update_progress("No results found", "warning")
status_container.warning("⚠️ No results found")
return None
except Exception as err:
logger.error(f"Failed to do Google SERP research: {err}")
return None
# Not failing, as tavily would do same and then GPT-V to search.
error_msg = f"Search failed: {str(err)}"
update_progress(error_msg, "error")
logger.error(error_msg)
logger.debug("Stack trace:", exc_info=True)
raise
finally:
logger.info("="*50)
logger.info("Google SERP Search function completed")
logger.info("="*50)
def do_tavily_ai_search(search_keywords, max_results=10):
def do_tavily_ai_search(search_keywords, max_results=10, **kwargs):
""" Common function to do Tavily AI web research."""
try:
# FIXME: Include the follow-up questions as blog FAQs.
logger.info(f"Doing Tavily AI search for: {search_keywords}")
t_results = get_tavilyai_results(search_keywords, max_results)
t_titles = tavily_extract_information(t_results, 'titles')
t_answer = tavily_extract_information(t_results, 'answer')
return(t_results, t_titles, t_answer)
# Prepare Tavily search parameters
tavily_params = {
'max_results': max_results,
'search_depth': 'advanced' if kwargs.get('search_depth', 3) > 2 else 'basic',
'time_range': kwargs.get('time_range', 'year'),
'include_domains': kwargs.get('include_domains', [""]) if kwargs.get('include_domains') else [""]
}
# Pass the parameters to get_tavilyai_results
t_results = get_tavilyai_results(
keywords=search_keywords,
**tavily_params
)
if t_results:
t_titles = tavily_extract_information(t_results, 'titles')
t_answer = tavily_extract_information(t_results, 'answer')
return(t_results, t_titles, t_answer)
else:
logger.warning("No results returned from Tavily AI search")
return None, None, None
except Exception as err:
logger.error(f"Failed to do Tavily AI Search: {err}")
return None, None, None
def do_metaphor_ai_research(search_keywords):
""" """
"""
Perform Metaphor AI research and return results with titles.
Args:
search_keywords (str): Keywords to search for
Returns:
tuple: (response_articles, titles) or (None, None) if search fails
"""
try:
logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}")
logger.info(f"Start Semantic/Neural web search with Metaphor: {search_keywords}")
response_articles = metaphor_search_articles(search_keywords)
m_titles = metaphor_extract_titles_or_text(response_articles, return_titles=True)
return(response_articles, m_titles)
if response_articles and 'data' in response_articles:
m_titles = [result.get('title', '') for result in response_articles['data'].get('results', [])]
return response_articles, m_titles
else:
logger.warning("No valid results from Metaphor search")
return None, None
except Exception as err:
logger.error(f"Failed to do Metaphor search: {err}")
return None, None
def do_google_pytrends_analysis(search_keywords):
@@ -163,4 +384,4 @@ def tavily_extract_information(json_data, keyword):
elif keyword == 'follow-query':
return json_data['follow_up_questions']
else:
return f"Invalid keyword: {keyword}"
return f"Invalid keyword: {keyword}"

View File

@@ -116,55 +116,431 @@ def metaphor_find_similar(similar_url):
return search_response
def metaphor_search_articles(query):
def calculate_date_range(time_range: str) -> tuple:
"""
Search for articles using the Metaphor API.
Calculate start and end dates based on time range selection.
Args:
time_range (str): One of 'past_day', 'past_week', 'past_month', 'past_year', 'anytime'
Returns:
tuple: (start_date, end_date) in ISO format with milliseconds
"""
now = datetime.utcnow()
end_date = now.strftime('%Y-%m-%dT%H:%M:%S.999Z')
if time_range == 'past_day':
start_date = (now - timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%S.000Z')
elif time_range == 'past_week':
start_date = (now - timedelta(weeks=1)).strftime('%Y-%m-%dT%H:%M:%S.000Z')
elif time_range == 'past_month':
start_date = (now - timedelta(days=30)).strftime('%Y-%m-%dT%H:%M:%S.000Z')
elif time_range == 'past_year':
start_date = (now - timedelta(days=365)).strftime('%Y-%m-%dT%H:%M:%S.000Z')
else: # anytime
start_date = None
end_date = None
return start_date, end_date
def metaphor_search_articles(query, search_options: dict = None):
"""
Search for articles using the Metaphor/Exa API.
Args:
query (str): The search query.
num_results (int): Number of results to retrieve.
use_autoprompt (bool): Whether to use autoprompt.
include_domains (list): List of domains to include.
time_range (str): Time range for published articles ("day", "week", "month", "year", "anytime").
search_options (dict): Search configuration options including:
- num_results (int): Number of results to retrieve
- use_autoprompt (bool): Whether to use autoprompt
- include_domains (list): List of domains to include
- time_range (str): One of 'past_day', 'past_week', 'past_month', 'past_year', 'anytime'
- exclude_domains (list): List of domains to exclude
Returns:
MetaphorResponse: The response from the Metaphor API.
dict: Search results and metadata
"""
metaphor = get_metaphor_client()
exa = get_metaphor_client()
try:
include_domains, start_published_date, num_results, similar_url = cfg_search_param('exa')
logger.info(f"Metaphor web search with Date: {start_published_date} and Query: {query}")
# Initialize default search options
if search_options is None:
search_options = {}
# Get config parameters or use defaults
try:
search_response = metaphor.search_and_contents(
query,
include_domains=include_domains,
use_autoprompt=True,
start_published_date=start_published_date,
num_results=num_results
)
except Exception as err:
logger.error(f"Failed in metaphor.search_and_contents: {err}")
# From each webpage, get a summary of the web page.
contents_response = search_response.results
# FIXME: Need to summarize for smaller input context window.
# for content in tqdm(contents_response, desc="Reading Web URL content:", unit="content"):
# summarized_content = summarize_web_content(content.text, "gemini")
# content.text = summarized_content
print_search_result(contents_response)
include_domains, _, num_results, _ = cfg_search_param('exa')
except Exception as cfg_err:
logger.warning(f"Failed to load config parameters: {cfg_err}. Using defaults.")
include_domains = None
num_results = 10
# Calculate date range based on time_range option
time_range = search_options.get('time_range', 'anytime')
start_published_date, end_published_date = calculate_date_range(time_range)
# Prepare search parameters
search_params = {
'num_results': search_options.get('num_results', num_results),
'summary': True, # Always get summaries
'include_domains': search_options.get('include_domains', include_domains),
'use_autoprompt': search_options.get('use_autoprompt', True),
}
# Add date parameters only if they are not None
if start_published_date:
search_params['start_published_date'] = start_published_date
if end_published_date:
search_params['end_published_date'] = end_published_date
logger.info(f"Exa web search with params: {search_params} and Query: {query}")
# Execute search
search_response = exa.search_and_contents(
query,
**search_params
)
if not search_response or not hasattr(search_response, 'results'):
logger.warning("No results returned from Exa search")
return None
# Get cost information safely
try:
cost_dollars = {
'total': float(search_response.cost_dollars['total']),
} if hasattr(search_response, 'cost_dollars') else None
except Exception as cost_err:
logger.warning(f"Error processing cost information: {cost_err}")
cost_dollars = None
# Format response to match expected structure
formatted_response = {
"data": {
"requestId": getattr(search_response, 'request_id', None),
"resolvedSearchType": "neural",
"results": [
{
"id": result.url,
"title": result.title,
"url": result.url,
"publishedDate": result.published_date if hasattr(result, 'published_date') else None,
"author": getattr(result, 'author', None),
"score": getattr(result, 'score', 0),
"summary": result.summary if hasattr(result, 'summary') else None,
"text": result.text if hasattr(result, 'text') else None,
"image": getattr(result, 'image', None),
"favicon": getattr(result, 'favicon', None)
}
for result in search_response.results
],
"costDollars": cost_dollars
}
}
# Get AI-generated answer from Metaphor
try:
exa_answer = get_exa_answer(query)
if exa_answer:
formatted_response.update(exa_answer)
except Exception as exa_err:
logger.warning(f"Error getting Exa answer: {exa_err}")
# Get AI-generated answer from Tavily
try:
# Import the function directly from the module
import importlib
tavily_module = importlib.import_module('lib.ai_web_researcher.tavily_ai_search')
if hasattr(tavily_module, 'do_tavily_ai_search'):
tavily_response = tavily_module.do_tavily_ai_search(query)
if tavily_response and 'answer' in tavily_response:
formatted_response.update({
"tavily_answer": tavily_response.get("answer"),
"tavily_citations": tavily_response.get("citations", []),
"tavily_cost_dollars": tavily_response.get("costDollars", {"total": 0})
})
else:
logger.warning("do_tavily_ai_search function not found in tavily_ai_search module")
except Exception as tavily_err:
logger.warning(f"Error getting Tavily answer: {tavily_err}")
# Display results in Streamlit
streamlit_display_metaphor_results(formatted_response)
return formatted_response
if similar_url:
logger.info(f"Doing similar/semantic search for URL: {similar_url}")
metaphor_find_similar(similar_url)
return contents_response
except Exception as e:
logger.error(f"Error in Metaphor searching articles: {e}")
raise
logger.error(f"Error in Exa searching articles: {e}")
return None
def streamlit_display_metaphor_results(metaphor_response: dict):
"""
Display Metaphor search results in Streamlit with enhanced metrics and popovers
Args:
metaphor_response (dict): Response from Metaphor search
"""
if not metaphor_response or 'data' not in metaphor_response:
st.error("No valid Metaphor search results to display")
return
# Initialize session state variables if they don't exist
if 'search_insights' not in st.session_state:
st.session_state.search_insights = None
if 'metaphor_response' not in st.session_state:
st.session_state.metaphor_response = metaphor_response
if 'insights_generated' not in st.session_state:
st.session_state.insights_generated = False
# Update the stored metaphor_response with the latest data
st.session_state.metaphor_response = metaphor_response
# Display metrics in columns
col1, col2, col3 = st.columns(3)
# Calculate metrics
results = metaphor_response['data']['results']
total_results = len(results)
avg_score = sum(r['score'] for r in results if r['score']) / total_results if total_results > 0 else 0
with col1:
st.metric(
label="Total Results",
value=total_results
)
with col2:
if metaphor_response['data'].get('costDollars'):
cost = metaphor_response['data']['costDollars']
st.metric(
label="Search Cost",
value=f"${cost['total']:.3f}"
)
with col3:
st.metric(
label="Average Relevance Score",
value=f"{avg_score:.2f}"
)
# Display AI-generated answers side by side
if 'answer' in metaphor_response or 'tavily_answer' in metaphor_response:
st.markdown("### 🤖 AI-Generated Research Answers")
# Create two columns for side-by-side display
tavily_col, metaphor_col = st.columns(2)
# Display Tavily answer if available
with tavily_col:
if 'tavily_answer' in metaphor_response:
st.markdown("#### 🔍 Tavily AI Answer")
st.markdown(f"""
<div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 5px solid #FF4B4B;">
{metaphor_response['tavily_answer']}
</div>
""", unsafe_allow_html=True)
if metaphor_response.get('tavily_cost_dollars'):
st.caption(f"Tavily Answer Cost: ${metaphor_response['tavily_cost_dollars']['total']:.3f}")
if metaphor_response.get('tavily_citations'):
with st.expander("📚 Tavily Sources"):
for idx, citation in enumerate(metaphor_response['tavily_citations'], 1):
st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})")
else:
st.markdown("#### 🔍 Tavily AI Answer")
st.info("No Tavily answer available for this query.")
# Display Metaphor answer if available
with metaphor_col:
if 'answer' in metaphor_response:
st.markdown("#### 🔍 Metaphor AI Answer")
st.markdown(f"""
<div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 5px solid #4CAF50;">
{metaphor_response['answer']}
</div>
""", unsafe_allow_html=True)
if metaphor_response.get('answerCostDollars'):
st.caption(f"Metaphor Answer Cost: ${metaphor_response['answerCostDollars']['total']:.3f}")
if metaphor_response.get('citations'):
with st.expander("📚 Metaphor Sources"):
for idx, citation in enumerate(metaphor_response['citations'], 1):
st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})")
else:
st.markdown("#### 🔍 Metaphor AI Answer")
st.info("No Metaphor answer available for this query.")
# Add "Get Search Insights" button - moved outside the AI answers conditional
st.markdown("### 🔍 Search Insights")
# Create a container for the insights
insights_container = st.container()
# Use a button with a callback function
if st.button("Generate Search Insights", type="primary"):
# Set a flag in session state to indicate that insights should be generated
st.session_state.insights_generated = True
# Store the current metaphor_response in session state
st.session_state.metaphor_response = metaphor_response
# Redirect to the same page with a query parameter to trigger insights generation
st.experimental_rerun()
# If insights should be generated, do it in a separate container
if st.session_state.insights_generated:
with insights_container:
with st.spinner("Analyzing search results to generate insights..."):
# Get the stored metaphor_response from session state
stored_response = st.session_state.metaphor_response
stored_results = stored_response['data']['results']
# Prepare data for analysis
analysis_data = {
"metaphor_results": stored_results,
"metaphor_answer": stored_response.get("answer", ""),
"tavily_answer": stored_response.get("tavily_answer", ""),
"metaphor_citations": stored_response.get("citations", []),
"tavily_citations": stored_response.get("tavily_citations", [])
}
# Create the analysis prompt
analysis_prompt = f"""
**Search Intent & User Needs Analysis**
I have conducted research using both Tavily and Metaphor AI search engines.
Below is the data from both sources:
**Metaphor AI Answer:**
{analysis_data["metaphor_answer"]}
**Tavily AI Answer:**
{analysis_data["tavily_answer"]}
**Search Results:**
{[f"{i+1}. {r['title']} - {r['summary']}" for i, r in enumerate(analysis_data["metaphor_results"])]}
**Citations:**
{[f"{i+1}. {c.get('title', 'Untitled')} - {c.get('url', 'No URL')}" for i, c in enumerate(analysis_data["metaphor_citations"] + analysis_data["tavily_citations"])]}
Based on this research data, please provide the following insights:
**Search Intent & User Needs**
```
Review the research data and identify:
1. The distribution of search intent (categorize as Informational/Commercial/Navigational/Transactional)
2. Most common user questions and their patterns
3. Frequently mentioned pain points or challenges
4. Recurring solutions or approaches to addressing these challenges
5. Gaps between user questions and available answers
Present findings in a structured format with percentages and specific examples.
```
Format your response as a comprehensive analysis with clear sections, bullet points, and examples from the research data.
"""
try:
# Import the llm_text_gen function
import importlib
text_gen_module = importlib.import_module('lib.gpt_providers.text_generation.main_text_generation')
if hasattr(text_gen_module, 'llm_text_gen'):
# Generate insights using llm_text_gen
insights = text_gen_module.llm_text_gen(analysis_prompt)
# Store insights in session state
st.session_state.search_insights = insights
# Reset the flag to prevent regeneration on next rerun
st.session_state.insights_generated = False
else:
st.error("Could not find llm_text_gen function in the text generation module.")
except Exception as e:
st.error(f"Error generating insights: {str(e)}")
logger.error(f"Error generating insights: {e}")
# Display insights if they exist in session state
if st.session_state.search_insights:
with insights_container:
st.markdown("### 🔍 Search Intent & User Needs Analysis")
st.markdown(st.session_state.search_insights)
# Create DataFrame from results
df = pd.DataFrame(results)
# Prepare data for display
display_df = df.copy()
display_df['Visit Site'] = display_df['url']
# Format publishedDate as string if it exists
if 'publishedDate' in display_df.columns:
display_df['publishedDate'] = display_df['publishedDate'].apply(
lambda x: x[:10] if isinstance(x, str) else 'N/A'
)
# Configure columns for data editor
columns = {
'title': st.column_config.TextColumn(
'Title',
width='large',
required=True,
),
'author': st.column_config.TextColumn(
'Author',
width='medium',
),
'publishedDate': st.column_config.TextColumn(
'Published Date',
width='medium',
),
'score': st.column_config.NumberColumn(
'Relevance Score',
width='small',
format="%.2f"
),
'Visit Site': st.column_config.LinkColumn(
'Link',
width='small',
display_text='Visit Site',
),
'summary': st.column_config.TextColumn(
'Summary',
width='large',
required=True,
)
}
# Display results in data editor
st.data_editor(
display_df,
column_config=columns,
hide_index=True,
num_rows='dynamic',
disabled=True,
column_order=['title', 'author', 'publishedDate', 'score', 'summary', 'Visit Site']
)
# Display detailed summaries with popovers
st.write("### Detailed Summaries")
for idx, result in enumerate(results, 1):
with st.expander(f"📄 {result['title']}", expanded=False):
col1, col2 = st.columns([3, 1])
with col1:
st.markdown(f"**Summary**")
st.markdown(result['summary'])
with col2:
st.markdown("**Details**")
st.markdown(f"**Author:** {result['author'] if result['author'] else 'N/A'}")
st.markdown(f"**Published:** {result['publishedDate'][:10] if result['publishedDate'] else 'N/A'}")
st.markdown(f"**Score:** {result['score']:.2f}")
st.markdown(f"[Visit Site]({result['url']})")
# Display search metadata
st.divider()
col1, col2 = st.columns(2)
with col1:
st.caption(f"Search Type: {metaphor_response['data']['resolvedSearchType']}")
with col2:
st.caption(f"Request ID: {metaphor_response['data']['requestId']}")
def metaphor_news_summarizer(news_keywords):
@@ -240,3 +616,56 @@ def metaphor_scholar_search(query, include_domains=None, time_range="anytime"):
return response
except Exception as e:
logger.error(f"Error in searching papers: {e}")
def get_exa_answer(query: str, system_prompt: str = None) -> dict:
"""
Get an AI-generated answer for a query using Exa's answer endpoint.
Args:
query (str): The search query to get an answer for
system_prompt (str, optional): Custom system prompt for the LLM. If None, uses default prompt.
Returns:
dict: Response containing answer, citations, and cost information
{
"answer": str,
"citations": list[dict],
"costDollars": dict
}
"""
exa = get_metaphor_client()
try:
# Use default system prompt if none provided
if system_prompt is None:
system_prompt = (
"I am doing research to write factual content. "
"Help me find answers for content generation task. "
"Provide detailed, well-structured answers with clear citations."
)
logger.info(f"Getting Exa answer for query: {query}")
logger.debug(f"Using system prompt: {system_prompt}")
# Make API call to get answer with system_prompt parameter
result = exa.answer(
query,
model="exa",
text=True # Include full text in citations
)
if not result or not result.get('answer'):
logger.warning("No answer received from Exa")
return None
# Format response to match expected structure
response = {
"answer": result.get('answer'),
"citations": result.get('citations', []),
"costDollars": result.get('costDollars', {"total": 0})
}
return response
except Exception as e:
logger.error(f"Error getting Exa answer: {e}")
return None

View File

@@ -49,17 +49,9 @@ from tenacity import retry, stop_after_attempt, wait_random_exponential
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def get_tavilyai_results(keywords, max_results=5):
def get_tavilyai_results(keywords, max_results=5, include_domains=None, search_depth="advanced", **kwargs):
"""
Get Tavily AI search results based on specified keywords and options.
Args:
keywords (str): Keywords for Tavily AI search.
include_urls (str): Comma-separated URLs to include in the search.
search_depth (str, optional): Search depth option (default is "advanced").
Returns:
dict: Tavily AI search results.
"""
# Run Tavily search
logger.info(f"Running Tavily search on: {keywords}")
@@ -74,56 +66,100 @@ def get_tavilyai_results(keywords, max_results=5):
client = TavilyClient(api_key=api_key)
except Exception as err:
logger.error(f"Failed to create Tavily client. Check TAVILY_API_KEY: {err}")
# Read search config params from the file.
try:
include_urls = cfg_search_param('tavily')
except Exception as err:
logger.error(f"Failed to read search params from main_config: {err}")
raise
try:
if include_urls:
tavily_search_result = client.search(keywords,
search_depth="advanced",
include_answer=True,
max_results=max_results,
include_domains=include_urls)
else:
tavily_search_result = client.search(keywords,
search_depth = "advanced",
include_answer=True,
max_results=max_results)
# Create search parameters exactly matching Tavily's API format
tavily_search_result = client.search(
query=keywords,
search_depth="advanced",
time_range="year",
include_answer="advanced",
include_domains=[""] if not include_domains else include_domains,
max_results=max_results
)
if tavily_search_result:
print_result_table(tavily_search_result)
streamlit_display_results(tavily_search_result)
return tavily_search_result
return None
print_result_table(tavily_search_result)
streamlit_display_results(tavily_search_result)
return(tavily_search_result)
except Exception as err:
logger.error(f"Failed to do Tavily Research: {err}")
raise
def streamlit_display_results(output_data):
"""Display Tavily AI search results in Streamlit UI."""
"""Display Tavily AI search results in Streamlit UI with enhanced visualization."""
# Prepare data for display
table_data = []
# Display the 'answer' in Streamlit with enhanced styling
answer = output_data.get("answer", "No answer available")
st.markdown("### 🤖 AI-Generated Answer")
st.markdown(f"""
<div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 5px solid #4CAF50;">
{answer}
</div>
""", unsafe_allow_html=True)
# Display follow-up questions if available
follow_up_questions = output_data.get("follow_up_questions", [])
if follow_up_questions:
st.markdown("### ❓ Follow-up Questions")
for i, question in enumerate(follow_up_questions, 1):
st.markdown(f"**{i}.** {question}")
# Prepare data for display with dataeditor
st.markdown("### 📊 Search Results")
# Create a DataFrame for the results
import pandas as pd
results_data = []
for item in output_data.get("results", []):
title = item.get("title", "")
snippet = item.get("content", "")
link = item.get("url", "")
table_data.append([title, snippet, link])
results_data.append({
"Title": title,
"Content": snippet,
"Link": link
})
if results_data:
df = pd.DataFrame(results_data)
# Display the data editor
st.data_editor(
df,
column_config={
"Title": st.column_config.TextColumn(
"Title",
help="Article title",
width="medium",
),
"Content": st.column_config.TextColumn(
"Content",
help="Click the button below to view full content",
width="large",
),
"Link": st.column_config.LinkColumn(
"Link",
help="Click to visit the website",
width="small",
display_text="Visit Site"
),
},
hide_index=True,
use_container_width=True,
)
# Display the table in Streamlit
st.table(table_data)
# Display the 'answer' in Streamlit
answer = output_data.get("answer", "No answer available")
st.write(f"**The answer to your search query:** {answer}")
# Display follow-up questions if available
follow_up_questions = output_data.get("follow_up_questions", [])
if follow_up_questions:
st.write(f"**Follow-up questions for the query:** {output_data.get('query')}")
st.write(", ".join(follow_up_questions))
# Add popovers for full content display
for item in output_data.get("results", []):
with st.popover(f"View content: {item.get('title', '')[:50]}..."):
st.markdown(item.get("content", ""))
else:
st.info("No results found for your search query.")
def print_result_table(output_data):

View File

@@ -0,0 +1,277 @@
import streamlit as st
import logging
from datetime import datetime
from typing import Dict, Optional, Any
# Configure module logger
logger = logging.getLogger(__name__)
def display_research_results(results: Dict[str, Any]) -> None:
"""
Display research results in a structured format with tabs.
Args:
results (dict): Processed research results containing summary and data
"""
if not results:
st.warning("No results to display")
return
# Create tabs for different result sections
tabs = st.tabs(["📊 Summary", "🔍 Results", "📈 Statistics"])
with tabs[0]:
display_summary_section(results)
with tabs[1]:
if results['source'] == 'gemini':
display_gemini_results(results)
else:
display_serp_results(results)
with tabs[2]:
display_statistics(results)
def process_research_results(results: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""Process and format research results."""
logger.info("Processing research results")
try:
if not results:
return None
processed = {
'timestamp': str(datetime.now()),
'source': results.get('source', 'unknown'),
'summary': {},
'data': {}
}
if results.get('source') == 'gemini':
processed.update(process_gemini_results(results))
else:
processed.update(process_serp_results(results))
logger.info("Results processing completed")
return processed
except Exception as err:
logger.error(f"Failed to process results: {err}", exc_info=True)
return None
def process_search_results(search_results: Dict[str, Any], search_type: str = "general") -> Optional[Dict[str, Any]]:
"""Process search results and prepare for display."""
logger.info(f"Processing {search_type} search results")
try:
if not search_results:
return None
processed = {
'organic': process_organic_results(search_results.get('organic', [])),
'peopleAlsoAsk': process_paa_results(search_results.get('peopleAlsoAsk', [])),
'relatedSearches': process_related_searches(search_results.get('relatedSearches', [])),
'metadata': {
'timestamp': str(datetime.now()),
'type': search_type
}
}
return processed
except Exception as err:
logger.error(f"Error processing search results: {err}", exc_info=True)
return None
# Helper functions for result processing
def process_organic_results(results):
"""Process organic search results."""
return [{
'title': result.get('title', 'No Title'),
'link': result.get('link', '#'),
'snippet': result.get('snippet', 'No snippet available'),
'position': result.get('position', 'N/A')
} for result in results]
def process_paa_results(results):
"""Process People Also Ask results."""
return [{
'question': result.get('title', ''),
'answer': result.get('snippet', 'No answer available'),
'link': result.get('link', '#')
} for result in results]
def process_related_searches(results):
"""Process related searches."""
return [query.get('query', '') for query in results]
def process_gemini_results(results: Dict[str, Any]) -> Dict[str, Any]:
"""
Process Gemini API research results.
Args:
results (dict): Raw Gemini research results
Returns:
dict: Processed results with summary and data
"""
gemini_data = results.get('results', {})
return {
'summary': {
'main_findings': gemini_data.get('main_response', ''),
'sources': gemini_data.get('grounding_data', []),
'processing_time': gemini_data.get('metadata', {}).get('timestamp'),
'total_sources': len(gemini_data.get('grounding_data', [])),
'model': gemini_data.get('metadata', {}).get('model', 'unknown')
},
'data': gemini_data
}
def process_serp_results(results: Dict[str, Any]) -> Dict[str, Any]:
"""
Process SERP search results.
Args:
results (dict): Raw SERP results
Returns:
dict: Processed results with summary and data
"""
organic_results = results.get('organic', [])
paa_results = results.get('peopleAlsoAsk', [])
related_searches = results.get('relatedSearches', [])
return {
'summary': {
'total_results': len(organic_results),
'sources': [result.get('link') for result in organic_results],
'titles': [result.get('title') for result in organic_results],
'total_questions': len(paa_results),
'total_related': len(related_searches)
},
'data': {
'organic': process_organic_results(organic_results),
'peopleAlsoAsk': process_paa_results(paa_results),
'relatedSearches': process_related_searches(related_searches)
}
}
# Display helper functions
def display_summary_section(results):
"""Display summary section of results."""
st.markdown("### 📋 Research Summary")
st.markdown(f"""
- **Source**: {results['source'].title()}
- **Time**: {results['timestamp']}
- **Total Sources**: {len(results.get('summary', {}).get('sources', []))}
""")
def display_gemini_results(results):
"""Display Gemini-specific results."""
st.markdown("### 🤖 Gemini Research Findings")
st.write(results['summary']['main_findings'])
with st.expander("🌐 Sources and References", expanded=False):
st.write(results['data'].get('grounding_data', 'No sources available'))
def display_serp_results(results):
"""Display SERP-specific results."""
st.markdown("### 🔍 Search Results")
for result in results['data'].get('organic', []):
with st.expander(f"📄 {result['title']}", expanded=False):
st.markdown(f"""
**Rank:** {result['position']}
**Link:** [{result['link']}]({result['link']})
**Snippet:**
{result['snippet']}
""")
def display_statistics(results: Dict[str, Any]) -> None:
"""
Display statistical information about search results.
Args:
results (dict): Processed research results
"""
st.markdown("### 📈 Research Statistics")
# Source-specific metrics
if results['source'] == 'gemini':
col1, col2 = st.columns(2)
with col1:
st.metric(
"Sources Analyzed",
results.get('summary', {}).get('total_sources', 0)
)
with col2:
st.metric(
"Model Used",
results.get('summary', {}).get('model', 'Unknown')
)
else: # SERP results
col1, col2, col3 = st.columns(3)
with col1:
st.metric(
"Organic Results",
results.get('summary', {}).get('total_results', 0)
)
with col2:
st.metric(
"Related Questions",
results.get('summary', {}).get('total_questions', 0)
)
with col3:
st.metric(
"Related Searches",
results.get('summary', {}).get('total_related', 0)
)
# Common metrics
st.markdown("#### 🕒 Timing Information")
st.info(f"Research completed at: {results['timestamp']}")
# Display data quality metrics
st.markdown("#### 📊 Data Quality")
quality_metrics = calculate_quality_metrics(results)
col1, col2 = st.columns(2)
with col1:
st.progress(quality_metrics['completeness'])
st.caption("Data Completeness")
with col2:
st.progress(quality_metrics['relevance'])
st.caption("Estimated Relevance")
def calculate_quality_metrics(results: Dict[str, Any]) -> Dict[str, float]:
"""
Calculate quality metrics for the research results.
Args:
results (dict): Processed research results
Returns:
dict: Quality metrics including completeness and relevance scores
"""
try:
if results['source'] == 'gemini':
completeness = 1.0 if results['summary']['main_findings'] else 0.0
relevance = 0.8 if results['summary']['sources'] else 0.4
else:
organic_results = results.get('summary', {}).get('total_results', 0)
completeness = min(organic_results / 10, 1.0) # Normalize to 0-1
has_paa = bool(results.get('summary', {}).get('total_questions', 0))
has_related = bool(results.get('summary', {}).get('total_related', 0))
relevance = (0.6 + (0.2 if has_paa else 0) + (0.2 if has_related else 0))
return {
'completeness': completeness,
'relevance': relevance
}
except Exception as err:
logger.error(f"Error calculating quality metrics: {err}")
return {'completeness': 0.0, 'relevance': 0.0}

View File

@@ -323,7 +323,8 @@ def do_web_research():
"num_results": 10,
"time_range": "past month",
"include_domains": "",
"similar_url": ""
"similar_url": "",
"search_mode": "google" # Default search mode
}
# Define the research options dialog function
@@ -425,6 +426,12 @@ def do_web_research():
horizontal=True,
help="Choose your preferred research method"
)
# Map the selected option to the search_mode value
for mode, label, _, _ in search_options:
if label == selected_option:
st.session_state.research_options["search_mode"] = mode
break
else:
st.warning("No search methods available. Please configure API keys.")
@@ -439,7 +446,7 @@ def do_web_research():
st.rerun()
# Main interface
st.title("Keyword Research Assistant")
st.title("ALwrity Web Researcher")
# Primary search area with help popover
with st.popover(" Keyword Research Tips"):
@@ -450,6 +457,7 @@ def do_web_research():
3. **Search Depth**: Higher depth = more comprehensive but slower
4. **Target Audience**: Affects content recommendations
5. **Content Type**: Influences research focus
6. **Search Mode**: Choose between traditional web research(Google), AI-powered search(Tavily and Metaphor) and Deep Researcher
""")
col1, col2 = st.columns([3, 1])
@@ -484,6 +492,7 @@ def do_web_research():
# Execute search with all parameters
web_research_result = gpt_web_researcher(
search_keywords=st.session_state.research_options["primary_keywords"],
search_mode=st.session_state.research_options["search_mode"],
related_keywords=st.session_state.research_options["related_keywords"],
target_audience=st.session_state.research_options["target_audience"],
content_type=st.session_state.research_options["content_type"],