AI Web Researcher: Added Exa answer and Tavily answer to the metaphor search results
Added AI insights to the metaphor search results Better display of AI search results
This commit is contained in:
@@ -22,14 +22,23 @@
|
||||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from datetime import datetime
|
||||
import streamlit as st
|
||||
|
||||
from lib.alwrity_ui.display_google_serp_results import (
|
||||
process_research_results,
|
||||
process_search_results,
|
||||
display_research_results
|
||||
)
|
||||
|
||||
from .tavily_ai_search import get_tavilyai_results
|
||||
from .metaphor_basic_neural_web_search import metaphor_find_similar, metaphor_search_articles
|
||||
from .metaphor_basic_neural_web_search import metaphor_search_articles
|
||||
from .google_serp_search import google_search
|
||||
from .google_trends_researcher import do_google_trends_analysis
|
||||
#from .google_gemini_web_researcher import do_gemini_web_research
|
||||
|
||||
from loguru import logger
|
||||
# Configure logger
|
||||
@@ -40,59 +49,271 @@ logger.add(sys.stdout,
|
||||
)
|
||||
|
||||
|
||||
|
||||
def gpt_web_researcher(search_keywords):
|
||||
""" Keyword based web researcher, basic, neural and Semantic search."""
|
||||
def gpt_web_researcher(search_keywords, search_mode, **kwargs):
|
||||
"""Keyword based web researcher with progress tracking."""
|
||||
|
||||
logger.info(f"Starting web research - Keywords: {search_keywords}, Mode: {search_mode}")
|
||||
logger.debug(f"Additional parameters: {kwargs}")
|
||||
|
||||
try:
|
||||
google_search_result = do_google_serp_search(search_keywords)
|
||||
tavily_search_result = do_tavily_ai_search(search_keywords)
|
||||
metaphor_search_result = do_metaphor_ai_research(search_keywords)
|
||||
gtrends_search_result = do_google_pytrends_analysis(search_keywords)
|
||||
# get_rag_results(search_query)
|
||||
print(f"\n\nReview the analysis in this file at: {os.environ.get('SEARCH_SAVE_FILE')}\n")
|
||||
# Initialize result container
|
||||
research_results = None
|
||||
|
||||
# Create status containers
|
||||
status_container = st.empty()
|
||||
progress_bar = st.progress(0)
|
||||
|
||||
def update_progress(message, progress=None, level="info"):
|
||||
if progress is not None:
|
||||
progress_bar.progress(progress)
|
||||
if level == "error":
|
||||
status_container.error(f"🚫 {message}")
|
||||
elif level == "warning":
|
||||
status_container.warning(f"⚠️ {message}")
|
||||
else:
|
||||
status_container.info(f"🔄 {message}")
|
||||
logger.debug(f"Progress update [{level}]: {message}")
|
||||
|
||||
if search_mode == "google":
|
||||
logger.info("Starting Google research pipeline")
|
||||
|
||||
try:
|
||||
# First try Google SERP
|
||||
update_progress("Initiating SERP search...", progress=10)
|
||||
serp_results = do_google_serp_search(search_keywords, **kwargs)
|
||||
|
||||
if serp_results and serp_results.get('organic'):
|
||||
logger.info("SERP search successful")
|
||||
update_progress("SERP search completed", progress=40)
|
||||
research_results = serp_results
|
||||
else:
|
||||
logger.warning("SERP search returned no results, falling back to Gemini")
|
||||
update_progress("No SERP results, trying Gemini...", progress=45)
|
||||
|
||||
# Keep it commented. Fallback to Gemini
|
||||
#try:
|
||||
# gemini_results = do_gemini_web_research(search_keywords)
|
||||
# if gemini_results:
|
||||
# logger.info("Gemini research successful")
|
||||
# update_progress("Gemini research completed", progress=80)
|
||||
# research_results = {
|
||||
# 'source': 'gemini',
|
||||
# 'results': gemini_results
|
||||
# }
|
||||
#except Exception as gemini_err:
|
||||
# logger.error(f"Gemini research failed: {gemini_err}")
|
||||
# update_progress("Gemini research failed", level="warning")
|
||||
|
||||
if research_results:
|
||||
update_progress("Processing final results...", progress=90)
|
||||
processed_results = process_research_results(research_results)
|
||||
|
||||
if processed_results:
|
||||
update_progress("Research completed!", progress=100, level="success")
|
||||
display_research_results(processed_results)
|
||||
return processed_results
|
||||
else:
|
||||
error_msg = "Failed to process research results"
|
||||
logger.warning(error_msg)
|
||||
update_progress(error_msg, level="warning")
|
||||
return None
|
||||
else:
|
||||
error_msg = "No results from either SERP or Gemini"
|
||||
logger.warning(error_msg)
|
||||
update_progress(error_msg, level="warning")
|
||||
return None
|
||||
|
||||
except Exception as search_err:
|
||||
error_msg = f"Research pipeline failed: {str(search_err)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
update_progress(error_msg, level="error")
|
||||
raise
|
||||
|
||||
elif search_mode == "ai":
|
||||
logger.info("Starting AI research pipeline")
|
||||
|
||||
try:
|
||||
# Do Tavily AI Search
|
||||
update_progress("Initiating Tavily AI search...", progress=10)
|
||||
|
||||
# Extract relevant parameters for Tavily search
|
||||
include_domains = kwargs.pop('include_domains', None)
|
||||
search_depth = kwargs.pop('search_depth', 'advanced')
|
||||
|
||||
# Pass the parameters to get_tavilyai_results
|
||||
t_results = get_tavilyai_results(
|
||||
keywords=search_keywords,
|
||||
max_results=kwargs.get('num_results', 10),
|
||||
include_domains=include_domains,
|
||||
search_depth=search_depth,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
# Do Metaphor AI Search
|
||||
update_progress("Initiating Metaphor AI search...", progress=50)
|
||||
metaphor_results, metaphor_titles = do_metaphor_ai_research(search_keywords)
|
||||
|
||||
if metaphor_results is None:
|
||||
update_progress("Metaphor AI search failed, continuing with Tavily results only...", level="warning")
|
||||
else:
|
||||
update_progress("Metaphor AI search completed successfully", progress=75)
|
||||
|
||||
except Exception as ai_err:
|
||||
error_msg = f"AI research pipeline failed: {str(ai_err)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
update_progress(error_msg, level="error")
|
||||
raise
|
||||
|
||||
else:
|
||||
error_msg = f"Unsupported search mode: {search_mode}"
|
||||
logger.error(error_msg)
|
||||
update_progress(error_msg, level="error")
|
||||
raise ValueError(error_msg)
|
||||
|
||||
except Exception as err:
|
||||
logger.error(f"Failed in gpt_web_researcher: {err}")
|
||||
error_msg = f"Failed in gpt_web_researcher: {str(err)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
if 'update_progress' in locals():
|
||||
update_progress(error_msg, level="error")
|
||||
raise
|
||||
|
||||
|
||||
def do_google_serp_search(search_keywords):
|
||||
""" COmmon function to do google SERP analysis and return results. """
|
||||
|
||||
# FIXME: Add a return filter to either return full json, titles, PAA, relatedsearches etc.
|
||||
def do_google_serp_search(search_keywords, status_container, update_progress, **kwargs):
|
||||
"""Perform Google SERP analysis with sidebar progress tracking."""
|
||||
|
||||
logger.info("="*50)
|
||||
logger.info("Starting Google SERP Search")
|
||||
logger.info("="*50)
|
||||
|
||||
try:
|
||||
logger.info(f"Doing Google search for: {search_keywords}\n")
|
||||
# Validate parameters
|
||||
update_progress("Validating search parameters")
|
||||
status_container.info("📝 Validating parameters...")
|
||||
|
||||
if not search_keywords or not isinstance(search_keywords, str):
|
||||
logger.error(f"Invalid search keywords: {search_keywords}")
|
||||
raise ValueError("Search keywords must be a non-empty string")
|
||||
|
||||
# Update search initiation
|
||||
update_progress(f"Initiating search for: '{search_keywords}'")
|
||||
status_container.info("🌐 Querying search API...")
|
||||
logger.info(f"Search params: {kwargs}")
|
||||
|
||||
# Execute search
|
||||
g_results = google_search(search_keywords)
|
||||
|
||||
if g_results:
|
||||
# Log success
|
||||
update_progress("Search completed successfully", "success")
|
||||
|
||||
# Update statistics
|
||||
stats = f"""Found:
|
||||
- {len(g_results.get('organic', []))} organic results
|
||||
- {len(g_results.get('peopleAlsoAsk', []))} related questions
|
||||
- {len(g_results.get('relatedSearches', []))} related searches"""
|
||||
update_progress(stats)
|
||||
|
||||
# Process results
|
||||
update_progress("Processing search results")
|
||||
status_container.info("⚡ Processing results...")
|
||||
processed_results = process_search_results(g_results)
|
||||
|
||||
# Extract titles
|
||||
update_progress("Extracting information")
|
||||
g_titles = extract_info(g_results, 'titles')
|
||||
return(g_results, g_titles)
|
||||
|
||||
# Final success
|
||||
update_progress("Analysis completed successfully", "success")
|
||||
status_container.success("✨ Research completed!")
|
||||
|
||||
# Clear main status after delay
|
||||
time.sleep(1)
|
||||
status_container.empty()
|
||||
|
||||
return {
|
||||
'results': g_results,
|
||||
'titles': g_titles,
|
||||
'summary': processed_results,
|
||||
'stats': {
|
||||
'organic_count': len(g_results.get('organic', [])),
|
||||
'questions_count': len(g_results.get('peopleAlsoAsk', [])),
|
||||
'related_count': len(g_results.get('relatedSearches', []))
|
||||
}
|
||||
}
|
||||
|
||||
else:
|
||||
update_progress("No results found", "warning")
|
||||
status_container.warning("⚠️ No results found")
|
||||
return None
|
||||
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Google SERP research: {err}")
|
||||
return None
|
||||
# Not failing, as tavily would do same and then GPT-V to search.
|
||||
error_msg = f"Search failed: {str(err)}"
|
||||
update_progress(error_msg, "error")
|
||||
logger.error(error_msg)
|
||||
logger.debug("Stack trace:", exc_info=True)
|
||||
raise
|
||||
|
||||
finally:
|
||||
logger.info("="*50)
|
||||
logger.info("Google SERP Search function completed")
|
||||
logger.info("="*50)
|
||||
|
||||
|
||||
def do_tavily_ai_search(search_keywords, max_results=10):
|
||||
def do_tavily_ai_search(search_keywords, max_results=10, **kwargs):
|
||||
""" Common function to do Tavily AI web research."""
|
||||
try:
|
||||
# FIXME: Include the follow-up questions as blog FAQs.
|
||||
logger.info(f"Doing Tavily AI search for: {search_keywords}")
|
||||
t_results = get_tavilyai_results(search_keywords, max_results)
|
||||
t_titles = tavily_extract_information(t_results, 'titles')
|
||||
t_answer = tavily_extract_information(t_results, 'answer')
|
||||
return(t_results, t_titles, t_answer)
|
||||
|
||||
# Prepare Tavily search parameters
|
||||
tavily_params = {
|
||||
'max_results': max_results,
|
||||
'search_depth': 'advanced' if kwargs.get('search_depth', 3) > 2 else 'basic',
|
||||
'time_range': kwargs.get('time_range', 'year'),
|
||||
'include_domains': kwargs.get('include_domains', [""]) if kwargs.get('include_domains') else [""]
|
||||
}
|
||||
|
||||
# Pass the parameters to get_tavilyai_results
|
||||
t_results = get_tavilyai_results(
|
||||
keywords=search_keywords,
|
||||
**tavily_params
|
||||
)
|
||||
|
||||
if t_results:
|
||||
t_titles = tavily_extract_information(t_results, 'titles')
|
||||
t_answer = tavily_extract_information(t_results, 'answer')
|
||||
return(t_results, t_titles, t_answer)
|
||||
else:
|
||||
logger.warning("No results returned from Tavily AI search")
|
||||
return None, None, None
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Tavily AI Search: {err}")
|
||||
return None, None, None
|
||||
|
||||
|
||||
def do_metaphor_ai_research(search_keywords):
|
||||
""" """
|
||||
"""
|
||||
Perform Metaphor AI research and return results with titles.
|
||||
|
||||
Args:
|
||||
search_keywords (str): Keywords to search for
|
||||
|
||||
Returns:
|
||||
tuple: (response_articles, titles) or (None, None) if search fails
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}")
|
||||
logger.info(f"Start Semantic/Neural web search with Metaphor: {search_keywords}")
|
||||
response_articles = metaphor_search_articles(search_keywords)
|
||||
m_titles = metaphor_extract_titles_or_text(response_articles, return_titles=True)
|
||||
return(response_articles, m_titles)
|
||||
|
||||
if response_articles and 'data' in response_articles:
|
||||
m_titles = [result.get('title', '') for result in response_articles['data'].get('results', [])]
|
||||
return response_articles, m_titles
|
||||
else:
|
||||
logger.warning("No valid results from Metaphor search")
|
||||
return None, None
|
||||
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Metaphor search: {err}")
|
||||
return None, None
|
||||
|
||||
|
||||
def do_google_pytrends_analysis(search_keywords):
|
||||
@@ -163,4 +384,4 @@ def tavily_extract_information(json_data, keyword):
|
||||
elif keyword == 'follow-query':
|
||||
return json_data['follow_up_questions']
|
||||
else:
|
||||
return f"Invalid keyword: {keyword}"
|
||||
return f"Invalid keyword: {keyword}"
|
||||
@@ -116,55 +116,431 @@ def metaphor_find_similar(similar_url):
|
||||
return search_response
|
||||
|
||||
|
||||
|
||||
def metaphor_search_articles(query):
|
||||
def calculate_date_range(time_range: str) -> tuple:
|
||||
"""
|
||||
Search for articles using the Metaphor API.
|
||||
Calculate start and end dates based on time range selection.
|
||||
|
||||
Args:
|
||||
time_range (str): One of 'past_day', 'past_week', 'past_month', 'past_year', 'anytime'
|
||||
|
||||
Returns:
|
||||
tuple: (start_date, end_date) in ISO format with milliseconds
|
||||
"""
|
||||
now = datetime.utcnow()
|
||||
end_date = now.strftime('%Y-%m-%dT%H:%M:%S.999Z')
|
||||
|
||||
if time_range == 'past_day':
|
||||
start_date = (now - timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%S.000Z')
|
||||
elif time_range == 'past_week':
|
||||
start_date = (now - timedelta(weeks=1)).strftime('%Y-%m-%dT%H:%M:%S.000Z')
|
||||
elif time_range == 'past_month':
|
||||
start_date = (now - timedelta(days=30)).strftime('%Y-%m-%dT%H:%M:%S.000Z')
|
||||
elif time_range == 'past_year':
|
||||
start_date = (now - timedelta(days=365)).strftime('%Y-%m-%dT%H:%M:%S.000Z')
|
||||
else: # anytime
|
||||
start_date = None
|
||||
end_date = None
|
||||
|
||||
return start_date, end_date
|
||||
|
||||
def metaphor_search_articles(query, search_options: dict = None):
|
||||
"""
|
||||
Search for articles using the Metaphor/Exa API.
|
||||
|
||||
Args:
|
||||
query (str): The search query.
|
||||
num_results (int): Number of results to retrieve.
|
||||
use_autoprompt (bool): Whether to use autoprompt.
|
||||
include_domains (list): List of domains to include.
|
||||
time_range (str): Time range for published articles ("day", "week", "month", "year", "anytime").
|
||||
search_options (dict): Search configuration options including:
|
||||
- num_results (int): Number of results to retrieve
|
||||
- use_autoprompt (bool): Whether to use autoprompt
|
||||
- include_domains (list): List of domains to include
|
||||
- time_range (str): One of 'past_day', 'past_week', 'past_month', 'past_year', 'anytime'
|
||||
- exclude_domains (list): List of domains to exclude
|
||||
|
||||
Returns:
|
||||
MetaphorResponse: The response from the Metaphor API.
|
||||
dict: Search results and metadata
|
||||
"""
|
||||
metaphor = get_metaphor_client()
|
||||
exa = get_metaphor_client()
|
||||
try:
|
||||
include_domains, start_published_date, num_results, similar_url = cfg_search_param('exa')
|
||||
|
||||
logger.info(f"Metaphor web search with Date: {start_published_date} and Query: {query}")
|
||||
# Initialize default search options
|
||||
if search_options is None:
|
||||
search_options = {}
|
||||
|
||||
# Get config parameters or use defaults
|
||||
try:
|
||||
search_response = metaphor.search_and_contents(
|
||||
query,
|
||||
include_domains=include_domains,
|
||||
use_autoprompt=True,
|
||||
start_published_date=start_published_date,
|
||||
num_results=num_results
|
||||
)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed in metaphor.search_and_contents: {err}")
|
||||
|
||||
# From each webpage, get a summary of the web page.
|
||||
contents_response = search_response.results
|
||||
# FIXME: Need to summarize for smaller input context window.
|
||||
# for content in tqdm(contents_response, desc="Reading Web URL content:", unit="content"):
|
||||
# summarized_content = summarize_web_content(content.text, "gemini")
|
||||
# content.text = summarized_content
|
||||
|
||||
print_search_result(contents_response)
|
||||
include_domains, _, num_results, _ = cfg_search_param('exa')
|
||||
except Exception as cfg_err:
|
||||
logger.warning(f"Failed to load config parameters: {cfg_err}. Using defaults.")
|
||||
include_domains = None
|
||||
num_results = 10
|
||||
|
||||
# Calculate date range based on time_range option
|
||||
time_range = search_options.get('time_range', 'anytime')
|
||||
start_published_date, end_published_date = calculate_date_range(time_range)
|
||||
|
||||
# Prepare search parameters
|
||||
search_params = {
|
||||
'num_results': search_options.get('num_results', num_results),
|
||||
'summary': True, # Always get summaries
|
||||
'include_domains': search_options.get('include_domains', include_domains),
|
||||
'use_autoprompt': search_options.get('use_autoprompt', True),
|
||||
}
|
||||
|
||||
# Add date parameters only if they are not None
|
||||
if start_published_date:
|
||||
search_params['start_published_date'] = start_published_date
|
||||
if end_published_date:
|
||||
search_params['end_published_date'] = end_published_date
|
||||
|
||||
logger.info(f"Exa web search with params: {search_params} and Query: {query}")
|
||||
|
||||
# Execute search
|
||||
search_response = exa.search_and_contents(
|
||||
query,
|
||||
**search_params
|
||||
)
|
||||
|
||||
if not search_response or not hasattr(search_response, 'results'):
|
||||
logger.warning("No results returned from Exa search")
|
||||
return None
|
||||
|
||||
# Get cost information safely
|
||||
try:
|
||||
cost_dollars = {
|
||||
'total': float(search_response.cost_dollars['total']),
|
||||
} if hasattr(search_response, 'cost_dollars') else None
|
||||
except Exception as cost_err:
|
||||
logger.warning(f"Error processing cost information: {cost_err}")
|
||||
cost_dollars = None
|
||||
|
||||
# Format response to match expected structure
|
||||
formatted_response = {
|
||||
"data": {
|
||||
"requestId": getattr(search_response, 'request_id', None),
|
||||
"resolvedSearchType": "neural",
|
||||
"results": [
|
||||
{
|
||||
"id": result.url,
|
||||
"title": result.title,
|
||||
"url": result.url,
|
||||
"publishedDate": result.published_date if hasattr(result, 'published_date') else None,
|
||||
"author": getattr(result, 'author', None),
|
||||
"score": getattr(result, 'score', 0),
|
||||
"summary": result.summary if hasattr(result, 'summary') else None,
|
||||
"text": result.text if hasattr(result, 'text') else None,
|
||||
"image": getattr(result, 'image', None),
|
||||
"favicon": getattr(result, 'favicon', None)
|
||||
}
|
||||
for result in search_response.results
|
||||
],
|
||||
"costDollars": cost_dollars
|
||||
}
|
||||
}
|
||||
|
||||
# Get AI-generated answer from Metaphor
|
||||
try:
|
||||
exa_answer = get_exa_answer(query)
|
||||
if exa_answer:
|
||||
formatted_response.update(exa_answer)
|
||||
except Exception as exa_err:
|
||||
logger.warning(f"Error getting Exa answer: {exa_err}")
|
||||
|
||||
# Get AI-generated answer from Tavily
|
||||
try:
|
||||
# Import the function directly from the module
|
||||
import importlib
|
||||
tavily_module = importlib.import_module('lib.ai_web_researcher.tavily_ai_search')
|
||||
if hasattr(tavily_module, 'do_tavily_ai_search'):
|
||||
tavily_response = tavily_module.do_tavily_ai_search(query)
|
||||
if tavily_response and 'answer' in tavily_response:
|
||||
formatted_response.update({
|
||||
"tavily_answer": tavily_response.get("answer"),
|
||||
"tavily_citations": tavily_response.get("citations", []),
|
||||
"tavily_cost_dollars": tavily_response.get("costDollars", {"total": 0})
|
||||
})
|
||||
else:
|
||||
logger.warning("do_tavily_ai_search function not found in tavily_ai_search module")
|
||||
except Exception as tavily_err:
|
||||
logger.warning(f"Error getting Tavily answer: {tavily_err}")
|
||||
|
||||
# Display results in Streamlit
|
||||
streamlit_display_metaphor_results(formatted_response)
|
||||
return formatted_response
|
||||
|
||||
if similar_url:
|
||||
logger.info(f"Doing similar/semantic search for URL: {similar_url}")
|
||||
metaphor_find_similar(similar_url)
|
||||
return contents_response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Metaphor searching articles: {e}")
|
||||
raise
|
||||
logger.error(f"Error in Exa searching articles: {e}")
|
||||
return None
|
||||
|
||||
def streamlit_display_metaphor_results(metaphor_response: dict):
|
||||
"""
|
||||
Display Metaphor search results in Streamlit with enhanced metrics and popovers
|
||||
|
||||
Args:
|
||||
metaphor_response (dict): Response from Metaphor search
|
||||
"""
|
||||
if not metaphor_response or 'data' not in metaphor_response:
|
||||
st.error("No valid Metaphor search results to display")
|
||||
return
|
||||
|
||||
# Initialize session state variables if they don't exist
|
||||
if 'search_insights' not in st.session_state:
|
||||
st.session_state.search_insights = None
|
||||
|
||||
if 'metaphor_response' not in st.session_state:
|
||||
st.session_state.metaphor_response = metaphor_response
|
||||
|
||||
if 'insights_generated' not in st.session_state:
|
||||
st.session_state.insights_generated = False
|
||||
|
||||
# Update the stored metaphor_response with the latest data
|
||||
st.session_state.metaphor_response = metaphor_response
|
||||
|
||||
# Display metrics in columns
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
# Calculate metrics
|
||||
results = metaphor_response['data']['results']
|
||||
total_results = len(results)
|
||||
avg_score = sum(r['score'] for r in results if r['score']) / total_results if total_results > 0 else 0
|
||||
|
||||
with col1:
|
||||
st.metric(
|
||||
label="Total Results",
|
||||
value=total_results
|
||||
)
|
||||
with col2:
|
||||
if metaphor_response['data'].get('costDollars'):
|
||||
cost = metaphor_response['data']['costDollars']
|
||||
st.metric(
|
||||
label="Search Cost",
|
||||
value=f"${cost['total']:.3f}"
|
||||
)
|
||||
with col3:
|
||||
st.metric(
|
||||
label="Average Relevance Score",
|
||||
value=f"{avg_score:.2f}"
|
||||
)
|
||||
|
||||
# Display AI-generated answers side by side
|
||||
if 'answer' in metaphor_response or 'tavily_answer' in metaphor_response:
|
||||
st.markdown("### 🤖 AI-Generated Research Answers")
|
||||
|
||||
# Create two columns for side-by-side display
|
||||
tavily_col, metaphor_col = st.columns(2)
|
||||
|
||||
# Display Tavily answer if available
|
||||
with tavily_col:
|
||||
if 'tavily_answer' in metaphor_response:
|
||||
st.markdown("#### 🔍 Tavily AI Answer")
|
||||
st.markdown(f"""
|
||||
<div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 5px solid #FF4B4B;">
|
||||
{metaphor_response['tavily_answer']}
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
if metaphor_response.get('tavily_cost_dollars'):
|
||||
st.caption(f"Tavily Answer Cost: ${metaphor_response['tavily_cost_dollars']['total']:.3f}")
|
||||
|
||||
if metaphor_response.get('tavily_citations'):
|
||||
with st.expander("📚 Tavily Sources"):
|
||||
for idx, citation in enumerate(metaphor_response['tavily_citations'], 1):
|
||||
st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})")
|
||||
else:
|
||||
st.markdown("#### 🔍 Tavily AI Answer")
|
||||
st.info("No Tavily answer available for this query.")
|
||||
|
||||
# Display Metaphor answer if available
|
||||
with metaphor_col:
|
||||
if 'answer' in metaphor_response:
|
||||
st.markdown("#### 🔍 Metaphor AI Answer")
|
||||
st.markdown(f"""
|
||||
<div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 5px solid #4CAF50;">
|
||||
{metaphor_response['answer']}
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
if metaphor_response.get('answerCostDollars'):
|
||||
st.caption(f"Metaphor Answer Cost: ${metaphor_response['answerCostDollars']['total']:.3f}")
|
||||
|
||||
if metaphor_response.get('citations'):
|
||||
with st.expander("📚 Metaphor Sources"):
|
||||
for idx, citation in enumerate(metaphor_response['citations'], 1):
|
||||
st.markdown(f"**Source {idx}:** [{citation.get('title', 'Untitled')}]({citation.get('url')})")
|
||||
else:
|
||||
st.markdown("#### 🔍 Metaphor AI Answer")
|
||||
st.info("No Metaphor answer available for this query.")
|
||||
|
||||
# Add "Get Search Insights" button - moved outside the AI answers conditional
|
||||
st.markdown("### 🔍 Search Insights")
|
||||
|
||||
# Create a container for the insights
|
||||
insights_container = st.container()
|
||||
|
||||
# Use a button with a callback function
|
||||
if st.button("Generate Search Insights", type="primary"):
|
||||
# Set a flag in session state to indicate that insights should be generated
|
||||
st.session_state.insights_generated = True
|
||||
|
||||
# Store the current metaphor_response in session state
|
||||
st.session_state.metaphor_response = metaphor_response
|
||||
|
||||
# Redirect to the same page with a query parameter to trigger insights generation
|
||||
st.experimental_rerun()
|
||||
|
||||
# If insights should be generated, do it in a separate container
|
||||
if st.session_state.insights_generated:
|
||||
with insights_container:
|
||||
with st.spinner("Analyzing search results to generate insights..."):
|
||||
# Get the stored metaphor_response from session state
|
||||
stored_response = st.session_state.metaphor_response
|
||||
stored_results = stored_response['data']['results']
|
||||
|
||||
# Prepare data for analysis
|
||||
analysis_data = {
|
||||
"metaphor_results": stored_results,
|
||||
"metaphor_answer": stored_response.get("answer", ""),
|
||||
"tavily_answer": stored_response.get("tavily_answer", ""),
|
||||
"metaphor_citations": stored_response.get("citations", []),
|
||||
"tavily_citations": stored_response.get("tavily_citations", [])
|
||||
}
|
||||
|
||||
# Create the analysis prompt
|
||||
analysis_prompt = f"""
|
||||
**Search Intent & User Needs Analysis**
|
||||
|
||||
I have conducted research using both Tavily and Metaphor AI search engines.
|
||||
Below is the data from both sources:
|
||||
|
||||
**Metaphor AI Answer:**
|
||||
{analysis_data["metaphor_answer"]}
|
||||
|
||||
**Tavily AI Answer:**
|
||||
{analysis_data["tavily_answer"]}
|
||||
|
||||
**Search Results:**
|
||||
{[f"{i+1}. {r['title']} - {r['summary']}" for i, r in enumerate(analysis_data["metaphor_results"])]}
|
||||
|
||||
**Citations:**
|
||||
{[f"{i+1}. {c.get('title', 'Untitled')} - {c.get('url', 'No URL')}" for i, c in enumerate(analysis_data["metaphor_citations"] + analysis_data["tavily_citations"])]}
|
||||
|
||||
Based on this research data, please provide the following insights:
|
||||
|
||||
**Search Intent & User Needs**
|
||||
```
|
||||
Review the research data and identify:
|
||||
1. The distribution of search intent (categorize as Informational/Commercial/Navigational/Transactional)
|
||||
2. Most common user questions and their patterns
|
||||
3. Frequently mentioned pain points or challenges
|
||||
4. Recurring solutions or approaches to addressing these challenges
|
||||
5. Gaps between user questions and available answers
|
||||
|
||||
Present findings in a structured format with percentages and specific examples.
|
||||
```
|
||||
|
||||
Format your response as a comprehensive analysis with clear sections, bullet points, and examples from the research data.
|
||||
"""
|
||||
|
||||
try:
|
||||
# Import the llm_text_gen function
|
||||
import importlib
|
||||
text_gen_module = importlib.import_module('lib.gpt_providers.text_generation.main_text_generation')
|
||||
if hasattr(text_gen_module, 'llm_text_gen'):
|
||||
# Generate insights using llm_text_gen
|
||||
insights = text_gen_module.llm_text_gen(analysis_prompt)
|
||||
|
||||
# Store insights in session state
|
||||
st.session_state.search_insights = insights
|
||||
|
||||
# Reset the flag to prevent regeneration on next rerun
|
||||
st.session_state.insights_generated = False
|
||||
else:
|
||||
st.error("Could not find llm_text_gen function in the text generation module.")
|
||||
except Exception as e:
|
||||
st.error(f"Error generating insights: {str(e)}")
|
||||
logger.error(f"Error generating insights: {e}")
|
||||
|
||||
# Display insights if they exist in session state
|
||||
if st.session_state.search_insights:
|
||||
with insights_container:
|
||||
st.markdown("### 🔍 Search Intent & User Needs Analysis")
|
||||
st.markdown(st.session_state.search_insights)
|
||||
|
||||
# Create DataFrame from results
|
||||
df = pd.DataFrame(results)
|
||||
|
||||
# Prepare data for display
|
||||
display_df = df.copy()
|
||||
display_df['Visit Site'] = display_df['url']
|
||||
|
||||
# Format publishedDate as string if it exists
|
||||
if 'publishedDate' in display_df.columns:
|
||||
display_df['publishedDate'] = display_df['publishedDate'].apply(
|
||||
lambda x: x[:10] if isinstance(x, str) else 'N/A'
|
||||
)
|
||||
|
||||
# Configure columns for data editor
|
||||
columns = {
|
||||
'title': st.column_config.TextColumn(
|
||||
'Title',
|
||||
width='large',
|
||||
required=True,
|
||||
),
|
||||
'author': st.column_config.TextColumn(
|
||||
'Author',
|
||||
width='medium',
|
||||
),
|
||||
'publishedDate': st.column_config.TextColumn(
|
||||
'Published Date',
|
||||
width='medium',
|
||||
),
|
||||
'score': st.column_config.NumberColumn(
|
||||
'Relevance Score',
|
||||
width='small',
|
||||
format="%.2f"
|
||||
),
|
||||
'Visit Site': st.column_config.LinkColumn(
|
||||
'Link',
|
||||
width='small',
|
||||
display_text='Visit Site',
|
||||
),
|
||||
'summary': st.column_config.TextColumn(
|
||||
'Summary',
|
||||
width='large',
|
||||
required=True,
|
||||
)
|
||||
}
|
||||
|
||||
# Display results in data editor
|
||||
st.data_editor(
|
||||
display_df,
|
||||
column_config=columns,
|
||||
hide_index=True,
|
||||
num_rows='dynamic',
|
||||
disabled=True,
|
||||
column_order=['title', 'author', 'publishedDate', 'score', 'summary', 'Visit Site']
|
||||
)
|
||||
|
||||
# Display detailed summaries with popovers
|
||||
st.write("### Detailed Summaries")
|
||||
for idx, result in enumerate(results, 1):
|
||||
with st.expander(f"📄 {result['title']}", expanded=False):
|
||||
col1, col2 = st.columns([3, 1])
|
||||
with col1:
|
||||
st.markdown(f"**Summary**")
|
||||
st.markdown(result['summary'])
|
||||
with col2:
|
||||
st.markdown("**Details**")
|
||||
st.markdown(f"**Author:** {result['author'] if result['author'] else 'N/A'}")
|
||||
st.markdown(f"**Published:** {result['publishedDate'][:10] if result['publishedDate'] else 'N/A'}")
|
||||
st.markdown(f"**Score:** {result['score']:.2f}")
|
||||
st.markdown(f"[Visit Site]({result['url']})")
|
||||
|
||||
# Display search metadata
|
||||
st.divider()
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.caption(f"Search Type: {metaphor_response['data']['resolvedSearchType']}")
|
||||
with col2:
|
||||
st.caption(f"Request ID: {metaphor_response['data']['requestId']}")
|
||||
|
||||
|
||||
def metaphor_news_summarizer(news_keywords):
|
||||
@@ -240,3 +616,56 @@ def metaphor_scholar_search(query, include_domains=None, time_range="anytime"):
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in searching papers: {e}")
|
||||
|
||||
def get_exa_answer(query: str, system_prompt: str = None) -> dict:
|
||||
"""
|
||||
Get an AI-generated answer for a query using Exa's answer endpoint.
|
||||
|
||||
Args:
|
||||
query (str): The search query to get an answer for
|
||||
system_prompt (str, optional): Custom system prompt for the LLM. If None, uses default prompt.
|
||||
|
||||
Returns:
|
||||
dict: Response containing answer, citations, and cost information
|
||||
{
|
||||
"answer": str,
|
||||
"citations": list[dict],
|
||||
"costDollars": dict
|
||||
}
|
||||
"""
|
||||
exa = get_metaphor_client()
|
||||
try:
|
||||
# Use default system prompt if none provided
|
||||
if system_prompt is None:
|
||||
system_prompt = (
|
||||
"I am doing research to write factual content. "
|
||||
"Help me find answers for content generation task. "
|
||||
"Provide detailed, well-structured answers with clear citations."
|
||||
)
|
||||
|
||||
logger.info(f"Getting Exa answer for query: {query}")
|
||||
logger.debug(f"Using system prompt: {system_prompt}")
|
||||
|
||||
# Make API call to get answer with system_prompt parameter
|
||||
result = exa.answer(
|
||||
query,
|
||||
model="exa",
|
||||
text=True # Include full text in citations
|
||||
)
|
||||
|
||||
if not result or not result.get('answer'):
|
||||
logger.warning("No answer received from Exa")
|
||||
return None
|
||||
|
||||
# Format response to match expected structure
|
||||
response = {
|
||||
"answer": result.get('answer'),
|
||||
"citations": result.get('citations', []),
|
||||
"costDollars": result.get('costDollars', {"total": 0})
|
||||
}
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting Exa answer: {e}")
|
||||
return None
|
||||
|
||||
@@ -49,17 +49,9 @@ from tenacity import retry, stop_after_attempt, wait_random_exponential
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def get_tavilyai_results(keywords, max_results=5):
|
||||
def get_tavilyai_results(keywords, max_results=5, include_domains=None, search_depth="advanced", **kwargs):
|
||||
"""
|
||||
Get Tavily AI search results based on specified keywords and options.
|
||||
|
||||
Args:
|
||||
keywords (str): Keywords for Tavily AI search.
|
||||
include_urls (str): Comma-separated URLs to include in the search.
|
||||
search_depth (str, optional): Search depth option (default is "advanced").
|
||||
|
||||
Returns:
|
||||
dict: Tavily AI search results.
|
||||
"""
|
||||
# Run Tavily search
|
||||
logger.info(f"Running Tavily search on: {keywords}")
|
||||
@@ -74,56 +66,100 @@ def get_tavilyai_results(keywords, max_results=5):
|
||||
client = TavilyClient(api_key=api_key)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to create Tavily client. Check TAVILY_API_KEY: {err}")
|
||||
|
||||
# Read search config params from the file.
|
||||
try:
|
||||
include_urls = cfg_search_param('tavily')
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to read search params from main_config: {err}")
|
||||
raise
|
||||
|
||||
try:
|
||||
if include_urls:
|
||||
tavily_search_result = client.search(keywords,
|
||||
search_depth="advanced",
|
||||
include_answer=True,
|
||||
max_results=max_results,
|
||||
include_domains=include_urls)
|
||||
else:
|
||||
tavily_search_result = client.search(keywords,
|
||||
search_depth = "advanced",
|
||||
include_answer=True,
|
||||
max_results=max_results)
|
||||
# Create search parameters exactly matching Tavily's API format
|
||||
tavily_search_result = client.search(
|
||||
query=keywords,
|
||||
search_depth="advanced",
|
||||
time_range="year",
|
||||
include_answer="advanced",
|
||||
include_domains=[""] if not include_domains else include_domains,
|
||||
max_results=max_results
|
||||
)
|
||||
|
||||
if tavily_search_result:
|
||||
print_result_table(tavily_search_result)
|
||||
streamlit_display_results(tavily_search_result)
|
||||
return tavily_search_result
|
||||
return None
|
||||
|
||||
print_result_table(tavily_search_result)
|
||||
streamlit_display_results(tavily_search_result)
|
||||
return(tavily_search_result)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Tavily Research: {err}")
|
||||
raise
|
||||
|
||||
|
||||
def streamlit_display_results(output_data):
|
||||
"""Display Tavily AI search results in Streamlit UI."""
|
||||
"""Display Tavily AI search results in Streamlit UI with enhanced visualization."""
|
||||
|
||||
# Prepare data for display
|
||||
table_data = []
|
||||
# Display the 'answer' in Streamlit with enhanced styling
|
||||
answer = output_data.get("answer", "No answer available")
|
||||
st.markdown("### 🤖 AI-Generated Answer")
|
||||
st.markdown(f"""
|
||||
<div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 5px solid #4CAF50;">
|
||||
{answer}
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Display follow-up questions if available
|
||||
follow_up_questions = output_data.get("follow_up_questions", [])
|
||||
if follow_up_questions:
|
||||
st.markdown("### ❓ Follow-up Questions")
|
||||
for i, question in enumerate(follow_up_questions, 1):
|
||||
st.markdown(f"**{i}.** {question}")
|
||||
|
||||
# Prepare data for display with dataeditor
|
||||
st.markdown("### 📊 Search Results")
|
||||
|
||||
# Create a DataFrame for the results
|
||||
import pandas as pd
|
||||
results_data = []
|
||||
|
||||
for item in output_data.get("results", []):
|
||||
title = item.get("title", "")
|
||||
snippet = item.get("content", "")
|
||||
link = item.get("url", "")
|
||||
table_data.append([title, snippet, link])
|
||||
results_data.append({
|
||||
"Title": title,
|
||||
"Content": snippet,
|
||||
"Link": link
|
||||
})
|
||||
|
||||
if results_data:
|
||||
df = pd.DataFrame(results_data)
|
||||
|
||||
# Display the data editor
|
||||
st.data_editor(
|
||||
df,
|
||||
column_config={
|
||||
"Title": st.column_config.TextColumn(
|
||||
"Title",
|
||||
help="Article title",
|
||||
width="medium",
|
||||
),
|
||||
"Content": st.column_config.TextColumn(
|
||||
"Content",
|
||||
help="Click the button below to view full content",
|
||||
width="large",
|
||||
),
|
||||
"Link": st.column_config.LinkColumn(
|
||||
"Link",
|
||||
help="Click to visit the website",
|
||||
width="small",
|
||||
display_text="Visit Site"
|
||||
),
|
||||
},
|
||||
hide_index=True,
|
||||
use_container_width=True,
|
||||
)
|
||||
|
||||
# Display the table in Streamlit
|
||||
st.table(table_data)
|
||||
|
||||
# Display the 'answer' in Streamlit
|
||||
answer = output_data.get("answer", "No answer available")
|
||||
st.write(f"**The answer to your search query:** {answer}")
|
||||
|
||||
# Display follow-up questions if available
|
||||
follow_up_questions = output_data.get("follow_up_questions", [])
|
||||
if follow_up_questions:
|
||||
st.write(f"**Follow-up questions for the query:** {output_data.get('query')}")
|
||||
st.write(", ".join(follow_up_questions))
|
||||
# Add popovers for full content display
|
||||
for item in output_data.get("results", []):
|
||||
with st.popover(f"View content: {item.get('title', '')[:50]}..."):
|
||||
st.markdown(item.get("content", ""))
|
||||
else:
|
||||
st.info("No results found for your search query.")
|
||||
|
||||
|
||||
def print_result_table(output_data):
|
||||
|
||||
277
lib/alwrity_ui/display_google_serp_results.py
Normal file
277
lib/alwrity_ui/display_google_serp_results.py
Normal file
@@ -0,0 +1,277 @@
|
||||
import streamlit as st
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Dict, Optional, Any
|
||||
|
||||
# Configure module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def display_research_results(results: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Display research results in a structured format with tabs.
|
||||
|
||||
Args:
|
||||
results (dict): Processed research results containing summary and data
|
||||
"""
|
||||
if not results:
|
||||
st.warning("No results to display")
|
||||
return
|
||||
|
||||
# Create tabs for different result sections
|
||||
tabs = st.tabs(["📊 Summary", "🔍 Results", "📈 Statistics"])
|
||||
|
||||
with tabs[0]:
|
||||
display_summary_section(results)
|
||||
|
||||
with tabs[1]:
|
||||
if results['source'] == 'gemini':
|
||||
display_gemini_results(results)
|
||||
else:
|
||||
display_serp_results(results)
|
||||
|
||||
with tabs[2]:
|
||||
display_statistics(results)
|
||||
|
||||
def process_research_results(results: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Process and format research results."""
|
||||
logger.info("Processing research results")
|
||||
|
||||
try:
|
||||
if not results:
|
||||
return None
|
||||
|
||||
processed = {
|
||||
'timestamp': str(datetime.now()),
|
||||
'source': results.get('source', 'unknown'),
|
||||
'summary': {},
|
||||
'data': {}
|
||||
}
|
||||
|
||||
if results.get('source') == 'gemini':
|
||||
processed.update(process_gemini_results(results))
|
||||
else:
|
||||
processed.update(process_serp_results(results))
|
||||
|
||||
logger.info("Results processing completed")
|
||||
return processed
|
||||
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to process results: {err}", exc_info=True)
|
||||
return None
|
||||
|
||||
def process_search_results(search_results: Dict[str, Any], search_type: str = "general") -> Optional[Dict[str, Any]]:
|
||||
"""Process search results and prepare for display."""
|
||||
logger.info(f"Processing {search_type} search results")
|
||||
|
||||
try:
|
||||
if not search_results:
|
||||
return None
|
||||
|
||||
processed = {
|
||||
'organic': process_organic_results(search_results.get('organic', [])),
|
||||
'peopleAlsoAsk': process_paa_results(search_results.get('peopleAlsoAsk', [])),
|
||||
'relatedSearches': process_related_searches(search_results.get('relatedSearches', [])),
|
||||
'metadata': {
|
||||
'timestamp': str(datetime.now()),
|
||||
'type': search_type
|
||||
}
|
||||
}
|
||||
|
||||
return processed
|
||||
|
||||
except Exception as err:
|
||||
logger.error(f"Error processing search results: {err}", exc_info=True)
|
||||
return None
|
||||
|
||||
# Helper functions for result processing
|
||||
def process_organic_results(results):
|
||||
"""Process organic search results."""
|
||||
return [{
|
||||
'title': result.get('title', 'No Title'),
|
||||
'link': result.get('link', '#'),
|
||||
'snippet': result.get('snippet', 'No snippet available'),
|
||||
'position': result.get('position', 'N/A')
|
||||
} for result in results]
|
||||
|
||||
def process_paa_results(results):
|
||||
"""Process People Also Ask results."""
|
||||
return [{
|
||||
'question': result.get('title', ''),
|
||||
'answer': result.get('snippet', 'No answer available'),
|
||||
'link': result.get('link', '#')
|
||||
} for result in results]
|
||||
|
||||
def process_related_searches(results):
|
||||
"""Process related searches."""
|
||||
return [query.get('query', '') for query in results]
|
||||
|
||||
def process_gemini_results(results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Process Gemini API research results.
|
||||
|
||||
Args:
|
||||
results (dict): Raw Gemini research results
|
||||
|
||||
Returns:
|
||||
dict: Processed results with summary and data
|
||||
"""
|
||||
gemini_data = results.get('results', {})
|
||||
return {
|
||||
'summary': {
|
||||
'main_findings': gemini_data.get('main_response', ''),
|
||||
'sources': gemini_data.get('grounding_data', []),
|
||||
'processing_time': gemini_data.get('metadata', {}).get('timestamp'),
|
||||
'total_sources': len(gemini_data.get('grounding_data', [])),
|
||||
'model': gemini_data.get('metadata', {}).get('model', 'unknown')
|
||||
},
|
||||
'data': gemini_data
|
||||
}
|
||||
|
||||
def process_serp_results(results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Process SERP search results.
|
||||
|
||||
Args:
|
||||
results (dict): Raw SERP results
|
||||
|
||||
Returns:
|
||||
dict: Processed results with summary and data
|
||||
"""
|
||||
organic_results = results.get('organic', [])
|
||||
paa_results = results.get('peopleAlsoAsk', [])
|
||||
related_searches = results.get('relatedSearches', [])
|
||||
|
||||
return {
|
||||
'summary': {
|
||||
'total_results': len(organic_results),
|
||||
'sources': [result.get('link') for result in organic_results],
|
||||
'titles': [result.get('title') for result in organic_results],
|
||||
'total_questions': len(paa_results),
|
||||
'total_related': len(related_searches)
|
||||
},
|
||||
'data': {
|
||||
'organic': process_organic_results(organic_results),
|
||||
'peopleAlsoAsk': process_paa_results(paa_results),
|
||||
'relatedSearches': process_related_searches(related_searches)
|
||||
}
|
||||
}
|
||||
|
||||
# Display helper functions
|
||||
def display_summary_section(results):
|
||||
"""Display summary section of results."""
|
||||
st.markdown("### 📋 Research Summary")
|
||||
st.markdown(f"""
|
||||
- **Source**: {results['source'].title()}
|
||||
- **Time**: {results['timestamp']}
|
||||
- **Total Sources**: {len(results.get('summary', {}).get('sources', []))}
|
||||
""")
|
||||
|
||||
def display_gemini_results(results):
|
||||
"""Display Gemini-specific results."""
|
||||
st.markdown("### 🤖 Gemini Research Findings")
|
||||
st.write(results['summary']['main_findings'])
|
||||
|
||||
with st.expander("🌐 Sources and References", expanded=False):
|
||||
st.write(results['data'].get('grounding_data', 'No sources available'))
|
||||
|
||||
def display_serp_results(results):
|
||||
"""Display SERP-specific results."""
|
||||
st.markdown("### 🔍 Search Results")
|
||||
|
||||
for result in results['data'].get('organic', []):
|
||||
with st.expander(f"📄 {result['title']}", expanded=False):
|
||||
st.markdown(f"""
|
||||
**Rank:** {result['position']}
|
||||
|
||||
**Link:** [{result['link']}]({result['link']})
|
||||
|
||||
**Snippet:**
|
||||
{result['snippet']}
|
||||
""")
|
||||
|
||||
def display_statistics(results: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Display statistical information about search results.
|
||||
|
||||
Args:
|
||||
results (dict): Processed research results
|
||||
"""
|
||||
st.markdown("### 📈 Research Statistics")
|
||||
|
||||
# Source-specific metrics
|
||||
if results['source'] == 'gemini':
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.metric(
|
||||
"Sources Analyzed",
|
||||
results.get('summary', {}).get('total_sources', 0)
|
||||
)
|
||||
with col2:
|
||||
st.metric(
|
||||
"Model Used",
|
||||
results.get('summary', {}).get('model', 'Unknown')
|
||||
)
|
||||
|
||||
else: # SERP results
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
st.metric(
|
||||
"Organic Results",
|
||||
results.get('summary', {}).get('total_results', 0)
|
||||
)
|
||||
with col2:
|
||||
st.metric(
|
||||
"Related Questions",
|
||||
results.get('summary', {}).get('total_questions', 0)
|
||||
)
|
||||
with col3:
|
||||
st.metric(
|
||||
"Related Searches",
|
||||
results.get('summary', {}).get('total_related', 0)
|
||||
)
|
||||
|
||||
# Common metrics
|
||||
st.markdown("#### 🕒 Timing Information")
|
||||
st.info(f"Research completed at: {results['timestamp']}")
|
||||
|
||||
# Display data quality metrics
|
||||
st.markdown("#### 📊 Data Quality")
|
||||
quality_metrics = calculate_quality_metrics(results)
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.progress(quality_metrics['completeness'])
|
||||
st.caption("Data Completeness")
|
||||
with col2:
|
||||
st.progress(quality_metrics['relevance'])
|
||||
st.caption("Estimated Relevance")
|
||||
|
||||
def calculate_quality_metrics(results: Dict[str, Any]) -> Dict[str, float]:
|
||||
"""
|
||||
Calculate quality metrics for the research results.
|
||||
|
||||
Args:
|
||||
results (dict): Processed research results
|
||||
|
||||
Returns:
|
||||
dict: Quality metrics including completeness and relevance scores
|
||||
"""
|
||||
try:
|
||||
if results['source'] == 'gemini':
|
||||
completeness = 1.0 if results['summary']['main_findings'] else 0.0
|
||||
relevance = 0.8 if results['summary']['sources'] else 0.4
|
||||
else:
|
||||
organic_results = results.get('summary', {}).get('total_results', 0)
|
||||
completeness = min(organic_results / 10, 1.0) # Normalize to 0-1
|
||||
has_paa = bool(results.get('summary', {}).get('total_questions', 0))
|
||||
has_related = bool(results.get('summary', {}).get('total_related', 0))
|
||||
relevance = (0.6 + (0.2 if has_paa else 0) + (0.2 if has_related else 0))
|
||||
|
||||
return {
|
||||
'completeness': completeness,
|
||||
'relevance': relevance
|
||||
}
|
||||
|
||||
except Exception as err:
|
||||
logger.error(f"Error calculating quality metrics: {err}")
|
||||
return {'completeness': 0.0, 'relevance': 0.0}
|
||||
@@ -323,7 +323,8 @@ def do_web_research():
|
||||
"num_results": 10,
|
||||
"time_range": "past month",
|
||||
"include_domains": "",
|
||||
"similar_url": ""
|
||||
"similar_url": "",
|
||||
"search_mode": "google" # Default search mode
|
||||
}
|
||||
|
||||
# Define the research options dialog function
|
||||
@@ -425,6 +426,12 @@ def do_web_research():
|
||||
horizontal=True,
|
||||
help="Choose your preferred research method"
|
||||
)
|
||||
|
||||
# Map the selected option to the search_mode value
|
||||
for mode, label, _, _ in search_options:
|
||||
if label == selected_option:
|
||||
st.session_state.research_options["search_mode"] = mode
|
||||
break
|
||||
else:
|
||||
st.warning("No search methods available. Please configure API keys.")
|
||||
|
||||
@@ -439,7 +446,7 @@ def do_web_research():
|
||||
st.rerun()
|
||||
|
||||
# Main interface
|
||||
st.title("Keyword Research Assistant")
|
||||
st.title("ALwrity Web Researcher")
|
||||
|
||||
# Primary search area with help popover
|
||||
with st.popover("ℹ️ Keyword Research Tips"):
|
||||
@@ -450,6 +457,7 @@ def do_web_research():
|
||||
3. **Search Depth**: Higher depth = more comprehensive but slower
|
||||
4. **Target Audience**: Affects content recommendations
|
||||
5. **Content Type**: Influences research focus
|
||||
6. **Search Mode**: Choose between traditional web research(Google), AI-powered search(Tavily and Metaphor) and Deep Researcher
|
||||
""")
|
||||
|
||||
col1, col2 = st.columns([3, 1])
|
||||
@@ -484,6 +492,7 @@ def do_web_research():
|
||||
# Execute search with all parameters
|
||||
web_research_result = gpt_web_researcher(
|
||||
search_keywords=st.session_state.research_options["primary_keywords"],
|
||||
search_mode=st.session_state.research_options["search_mode"],
|
||||
related_keywords=st.session_state.research_options["related_keywords"],
|
||||
target_audience=st.session_state.research_options["target_audience"],
|
||||
content_type=st.session_state.research_options["content_type"],
|
||||
|
||||
Reference in New Issue
Block a user