ALwrity AI Keyword Web Researcher

This commit is contained in:
ajaysi
2025-04-04 10:48:46 +05:30
parent b4660d9d98
commit 3ffb563d40
4 changed files with 520 additions and 24 deletions

View File

@@ -0,0 +1,515 @@
import os
import time
import logging
import streamlit as st
from datetime import datetime
from lib.ai_web_researcher.gpt_online_researcher import gpt_web_researcher
from lib.utils.read_main_config_params import read_return_config_section
# Configure module-level logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
# Create console handler if it doesn't exist
if not logger.handlers:
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
def reload_env_variables():
"""Reload environment variables from .env file."""
try:
from dotenv import load_dotenv
load_dotenv(override=True)
return True
except Exception as e:
logger.error(f"Failed to reload environment variables: {str(e)}")
return False
def save_api_key_to_env(key_name, key_value):
"""Save API key to .env file."""
try:
env_path = os.path.join(os.getcwd(), '.env')
# Read existing .env content
existing_content = {}
if os.path.exists(env_path):
with open(env_path, 'r') as f:
for line in f:
if '=' in line:
key, value = line.strip().split('=', 1)
existing_content[key] = value
# Update or add new key
existing_content[key_name] = key_value
# Write back to .env
with open(env_path, 'w') as f:
for key, value in existing_content.items():
f.write(f"{key}={value}\n")
# Update environment variable and reload all env vars
os.environ[key_name] = key_value
if reload_env_variables():
return True
return False
except Exception as e:
logger.error(f"Failed to save API key to .env: {str(e)}")
return False
def validate_api_keys():
"""Validate required API keys and return their status."""
logger.info("Validating API keys")
# Get API keys
api_keys = {
'SERPER_API_KEY': os.getenv('SERPER_API_KEY'),
'METAPHOR_API_KEY': os.getenv('METAPHOR_API_KEY'),
'TAVILY_API_KEY': os.getenv('TAVILY_API_KEY'),
'FIRECRAWL_API_KEY': os.getenv('FIRECRAWL_API_KEY')
}
# Test SERPER_API_KEY validity
if api_keys['SERPER_API_KEY']:
try:
# Make a test request
import requests
test_url = "https://google.serper.dev/search"
headers = {
'X-API-KEY': api_keys['SERPER_API_KEY'],
'Content-Type': 'application/json'
}
test_payload = {"q": "test", "gl": "us", "hl": "en", "num": 1}
response = requests.post(test_url, headers=headers, json=test_payload)
api_keys['SERPER_API_KEY_VALID'] = response.status_code == 200
if not api_keys['SERPER_API_KEY_VALID']:
logger.error(f"SERPER_API_KEY validation failed: {response.status_code} - {response.text}")
except Exception as e:
logger.error(f"Error validating SERPER_API_KEY: {str(e)}")
api_keys['SERPER_API_KEY_VALID'] = False
else:
api_keys['SERPER_API_KEY_VALID'] = False
return api_keys
def do_web_research():
"""Input keywords and do web research with advanced options."""
logger.info("Starting do_web_research function")
try:
# Get API keys without validation
api_keys = {
'SERPER_API_KEY': os.getenv('SERPER_API_KEY'),
'METAPHOR_API_KEY': os.getenv('METAPHOR_API_KEY'),
'TAVILY_API_KEY': os.getenv('TAVILY_API_KEY'),
'FIRECRAWL_API_KEY': os.getenv('FIRECRAWL_API_KEY')
}
if not api_keys['SERPER_API_KEY']:
st.error("""
🚫 SERPER_API_KEY is missing. Please configure your API key.
""")
with st.popover("⚙️ Configure API Keys"):
st.markdown("""
### API Key Configuration
Enter your API keys below to enable research features.
""")
# SERPER API Key
serper_col1, serper_col2 = st.columns([3, 1])
with serper_col1:
serper_key = st.text_input(
"Serper API Key",
type="password",
placeholder="Enter your Serper API key",
help="Get your key at https://serper.dev"
)
test_key = st.checkbox("Test API key before saving", value=False, help="Validate the API key before saving")
with serper_col2:
if st.button("Save Serper", use_container_width=True):
if serper_key:
if test_key:
# Test the API key
try:
import requests
test_url = "https://google.serper.dev/search"
headers = {
'X-API-KEY': serper_key,
'Content-Type': 'application/json'
}
test_payload = {"q": "test", "gl": "us", "hl": "en", "num": 1}
response = requests.post(test_url, headers=headers, json=test_payload)
if response.status_code == 200:
if save_api_key_to_env('SERPER_API_KEY', serper_key):
st.success("✅ Serper API key validated and saved!")
st.rerun()
else:
st.error("Failed to save API key")
else:
st.error(f"API key validation failed: {response.status_code} - {response.text}")
except Exception as e:
st.error(f"Error validating API key: {str(e)}")
else:
# Skip validation and save directly
if save_api_key_to_env('SERPER_API_KEY', serper_key):
st.success("✅ Serper API key saved!")
time.sleep(0.5) # Small delay to ensure the key is saved
st.rerun()
else:
st.error("Failed to save API key")
# METAPHOR API Key
if not api_keys.get('METAPHOR_API_KEY'):
metaphor_col1, metaphor_col2 = st.columns([3, 1])
with metaphor_col1:
metaphor_key = st.text_input(
"Metaphor API Key",
type="password",
placeholder="Enter your Metaphor API key",
help="Get your key at https://metaphor.systems"
)
test_metaphor = st.checkbox("Test API key before saving", value=False, help="Validate the API key before saving")
with metaphor_col2:
if st.button("Save Metaphor", use_container_width=True):
if metaphor_key:
if test_metaphor:
# Test the API key
try:
import requests
test_url = "https://api.metaphor.systems/v1/search"
headers = {
'Authorization': f'Bearer {metaphor_key}',
'Content-Type': 'application/json'
}
test_payload = {"query": "test", "numResults": 1}
response = requests.post(test_url, headers=headers, json=test_payload)
if response.status_code == 200:
if save_api_key_to_env('METAPHOR_API_KEY', metaphor_key):
st.success("✅ Metaphor API key validated and saved!")
st.rerun()
else:
st.error("Failed to save API key")
else:
st.error(f"API key validation failed: {response.status_code} - {response.text}")
except Exception as e:
st.error(f"Error validating API key: {str(e)}")
else:
# Skip validation and save directly
if save_api_key_to_env('METAPHOR_API_KEY', metaphor_key):
st.success("✅ Metaphor API key saved!")
st.rerun()
else:
st.error("Failed to save API key")
# TAVILY API Key
if not api_keys.get('TAVILY_API_KEY'):
tavily_col1, tavily_col2 = st.columns([3, 1])
with tavily_col1:
tavily_key = st.text_input(
"Tavily API Key",
type="password",
placeholder="Enter your Tavily API key",
help="Get your key at https://tavily.com"
)
test_tavily = st.checkbox("Test API key before saving", value=False, help="Validate the API key before saving")
with tavily_col2:
if st.button("Save Tavily", use_container_width=True):
if tavily_key:
if test_tavily:
# Test the API key
try:
import requests
test_url = "https://api.tavily.com/v1/search"
headers = {
'Authorization': f'Bearer {tavily_key}',
'Content-Type': 'application/json'
}
test_payload = {"query": "test", "max_results": 1}
response = requests.post(test_url, headers=headers, json=test_payload)
if response.status_code == 200:
if save_api_key_to_env('TAVILY_API_KEY', tavily_key):
st.success("✅ Tavily API key validated and saved!")
st.rerun()
else:
st.error("Failed to save API key")
else:
st.error(f"API key validation failed: {response.status_code} - {response.text}")
except Exception as e:
st.error(f"Error validating API key: {str(e)}")
else:
# Skip validation and save directly
if save_api_key_to_env('TAVILY_API_KEY', tavily_key):
st.success("✅ Tavily API key saved!")
st.rerun()
else:
st.error("Failed to save API key")
# FIRECRAWL API Key
if not api_keys.get('FIRECRAWL_API_KEY'):
firecrawl_col1, firecrawl_col2 = st.columns([3, 1])
with firecrawl_col1:
firecrawl_key = st.text_input(
"Firecrawl API Key",
type="password",
placeholder="Enter your Firecrawl API key",
help="Get your key at https://firecrawl.co"
)
test_firecrawl = st.checkbox("Test API key before saving", value=False, help="Validate the API key before saving")
with firecrawl_col2:
if st.button("Save Firecrawl", use_container_width=True):
if firecrawl_key:
if test_firecrawl:
# Test the API key
try:
import requests
test_url = "https://api.firecrawl.co/v1/search"
headers = {
'Authorization': f'Bearer {firecrawl_key}',
'Content-Type': 'application/json'
}
test_payload = {"query": "test", "limit": 1}
response = requests.post(test_url, headers=headers, json=test_payload)
if response.status_code == 200:
if save_api_key_to_env('FIRECRAWL_API_KEY', firecrawl_key):
st.success("✅ Firecrawl API key validated and saved!")
st.rerun()
else:
st.error("Failed to save API key")
else:
st.error(f"API key validation failed: {response.status_code} - {response.text}")
except Exception as e:
st.error(f"Error validating API key: {str(e)}")
else:
# Skip validation and save directly
if save_api_key_to_env('FIRECRAWL_API_KEY', firecrawl_key):
st.success("✅ Firecrawl API key saved!")
st.rerun()
else:
st.error("Failed to save API key")
st.markdown("""
---
### Need Help?
1. Click the links above to get your API keys
2. Enter the keys in the fields above
3. Click Save to store them securely
4. The app will refresh automatically
""")
return
# Initialize session state for research options
if "research_options" not in st.session_state:
st.session_state.research_options = {
"primary_keywords": "",
"related_keywords": "",
"target_audience": ["General"],
"content_type": ["Blog Posts"],
"search_depth": 3,
"geo_location": "us",
"search_language": "en",
"num_results": 10,
"time_range": "past month",
"include_domains": "",
"similar_url": ""
}
# Define the research options dialog function
@st.dialog("🔍 Research Options", width="large")
def show_research_options():
tab1, tab2, tab3 = st.tabs(["Basic", "Advanced", "Technical"])
with tab1:
st.session_state.research_options["related_keywords"] = st.text_input(
"Related Keywords",
value=st.session_state.research_options["related_keywords"],
placeholder="Enter related terms...",
help="Additional keywords to provide context and expand research"
)
st.session_state.research_options["target_audience"] = st.multiselect(
"Target Audience",
["General", "Technical", "Business", "Academic", "Youth", "Senior"],
default=st.session_state.research_options["target_audience"],
help="Select your target audience to focus research"
)
st.session_state.research_options["content_type"] = st.multiselect(
"Content Type",
["Blog Posts", "Articles", "Social Media", "Whitepapers", "Tutorials", "Videos"],
default=st.session_state.research_options["content_type"],
help="Select content types to tailor research results"
)
st.session_state.research_options["search_depth"] = st.slider(
"Search Depth",
min_value=1,
max_value=5,
value=st.session_state.research_options["search_depth"],
help="Higher depth means more comprehensive but slower research"
)
with tab2:
col1, col2 = st.columns(2)
with col1:
st.session_state.research_options["geo_location"] = st.selectbox(
"Geographic Location",
options=["us", "in", "uk", "fr", "de", "jp", "custom"],
index=["us", "in", "uk", "fr", "de", "jp"].index(st.session_state.research_options["geo_location"]),
help="Target specific geographic region for research"
)
st.session_state.research_options["num_results"] = st.number_input(
"Number of Results",
min_value=1,
max_value=100,
value=st.session_state.research_options["num_results"],
help="Number of results to analyze"
)
with col2:
st.session_state.research_options["search_language"] = st.selectbox(
"Search Language",
options=["en", "hi", "fr", "de", "es", "custom"],
index=["en", "hi", "fr", "de", "es"].index(st.session_state.research_options["search_language"]),
help="Primary language for search results"
)
st.session_state.research_options["time_range"] = st.selectbox(
"Time Range",
options=["past day", "past week", "past month", "past year", "anytime"],
index=["past day", "past week", "past month", "past year", "anytime"].index(st.session_state.research_options["time_range"]),
help="Time period for research results"
)
with tab3:
st.session_state.research_options["include_domains"] = st.text_input(
"Include Domains",
value=st.session_state.research_options["include_domains"],
placeholder="example.com, another.com",
help="Specific domains to include in research"
)
st.session_state.research_options["similar_url"] = st.text_input(
"Similar URL",
value=st.session_state.research_options["similar_url"],
placeholder="https://example.com/page",
help="Find content similar to this URL"
)
# Research method selection
st.markdown("### Select Research Method")
search_options = [
("google", "🔍 Google Search", "Traditional web research with AI analysis", bool(api_keys['SERPER_API_KEY'])),
("ai", "🤖 AI Search", "Neural search with semantic analysis", bool(api_keys['METAPHOR_API_KEY'] and api_keys['TAVILY_API_KEY'])),
("deep", "🔬 Deep Search (Beta)", "Advanced deep web analysis", bool(all(api_keys.values())))
]
enabled_options = [opt[1] for opt in search_options if opt[3]]
if enabled_options:
selected_option = st.radio(
"Search Method",
options=enabled_options,
horizontal=True,
help="Choose your preferred research method"
)
else:
st.warning("No search methods available. Please configure API keys.")
col1, col2 = st.columns([1, 1])
with col1:
if st.button("Apply", use_container_width=True, type="primary"):
st.session_state.show_options_dialog = False
st.rerun()
with col2:
if st.button("Cancel", use_container_width=True):
st.session_state.show_options_dialog = False
st.rerun()
# Main interface
st.title("Keyword Research Assistant")
# Primary search area with help popover
with st.popover(" Keyword Research Tips"):
st.markdown("""
### How to Get Better Results
1. **Primary Keywords**: Your main topic or focus
2. **Related Keywords**: Supporting terms that add context
3. **Search Depth**: Higher depth = more comprehensive but slower
4. **Target Audience**: Affects content recommendations
5. **Content Type**: Influences research focus
""")
col1, col2 = st.columns([3, 1])
with col1:
st.session_state.research_options["primary_keywords"] = st.text_input(
"Primary Keywords",
value=st.session_state.research_options["primary_keywords"],
placeholder="Enter main keywords for research...",
help="Enter your main topic or focus keywords"
)
with col2:
if st.button("Research Options", use_container_width=True):
show_research_options()
# Execute search button
if st.button("🔍 Start Research", type="primary", use_container_width=True):
if not st.session_state.research_options["primary_keywords"]:
st.warning("⚠️ Please enter primary keywords for research")
return
try:
# Create compact progress display
progress_container = st.container()
with progress_container:
status_col, progress_col = st.columns([3, 1])
with status_col:
status_display = st.empty()
status_display.info("🚀 Initializing research...")
with progress_col:
progress_bar = st.progress(0)
# Execute search with all parameters
web_research_result = gpt_web_researcher(
search_keywords=st.session_state.research_options["primary_keywords"],
related_keywords=st.session_state.research_options["related_keywords"],
target_audience=st.session_state.research_options["target_audience"],
content_type=st.session_state.research_options["content_type"],
search_depth=st.session_state.research_options["search_depth"],
geo_location=st.session_state.research_options["geo_location"],
search_language=st.session_state.research_options["search_language"],
num_results=st.session_state.research_options["num_results"],
time_range=st.session_state.research_options["time_range"],
include_domains=st.session_state.research_options["include_domains"],
similar_url=st.session_state.research_options["similar_url"]
)
if web_research_result:
status_display.success("✨ Research completed!")
# Display results in an organized way
with st.expander("📊 Research Results", expanded=True):
st.write(web_research_result)
else:
st.warning("No results found for your search")
except Exception as e:
error_msg = f"Research failed: {str(e)}"
logger.error(error_msg, exc_info=True)
st.error(f"🚫 Research failed: {error_msg}")
except Exception as e:
logger.error(f"Unexpected error in web research: {e}", exc_info=True)
st.error("🚫 An unexpected error occurred. Please try again.")

View File

@@ -457,26 +457,6 @@ def competitor_analysis():
st.error("Please enter a valid URL.")
def do_web_research():
""" Input keywords and do web research and present a report."""
st.title("Web Research Assistant")
st.write("Enter keywords for web research. The keywords should be at least three words long.")
search_keywords = st.text_input("Search Keywords", placeholder="Enter keywords for web research...")
if st.button("Start Web Research"):
if search_keywords and len(search_keywords.split()) >= 3:
try:
st.info(f"Starting web research on given keywords: {search_keywords}")
with st.spinner("Performing web research..."):
web_research_result = gpt_web_researcher(search_keywords)
st.success("Web research completed successfully!")
st.write(web_research_result)
except Exception as err:
st.error(f"ERROR: Failed to do web research: {err}")
else:
st.warning("Search keywords should be at least three words long. Please try again.")
def ai_finance_ta_writer():
st.markdown("<div class='sub-header'>AI Financial Technical Analysis Writer</div>", unsafe_allow_html=True)

View File

@@ -1,12 +1,12 @@
import streamlit as st
from lib.utils.alwrity_utils import (
blog_from_keyword, ai_agents_team, essay_writer, ai_news_writer,
ai_finance_ta_writer, ai_social_writer, do_web_research, competitor_analysis
ai_finance_ta_writer, competitor_analysis
)
from lib.alwrity_ui.keyword_web_researcher import do_web_research
from lib.ai_writers.ai_story_writer.story_writer import story_input_section
from lib.ai_writers.ai_product_description_writer import write_ai_prod_desc
#from lib.content_planning_calender.content_planning_agents_alwrity_crew import ai_agents_content_planner
from lib.utils.seo_tools import ai_seo_tools
def ai_writers():

View File

@@ -6,7 +6,7 @@ beautifulsoup4==4.12.2
aiohttp>=3.11.11
openai>=1.3.7
PyPDF2>=3.0.1
google-genai==1.9.0
google-genai>=1.0.0
anthropic>=0.18.1
tenacity>=8.2.3
tabulate>=0.9.0
@@ -31,7 +31,8 @@ prompt_toolkit>=3.0.43
html2image>=2.0.5
lxml[html_clean]>=5.3.0
lxml_html_clean>=0.4.1
streamlit>=1.29.0
streamlit>=1.44.0
Authlib>=1.3.2
yfinance>=0.2.36
pandas_ta>=0.3.14b0
firecrawl-py>=1.14.1