AI Content planning and competitor analysis.
This commit is contained in:
338
lib/alwrity_ui/similar_analysis.py
Normal file
338
lib/alwrity_ui/similar_analysis.py
Normal file
@@ -0,0 +1,338 @@
|
||||
import streamlit as st
|
||||
from lib.ai_web_researcher.metaphor_basic_neural_web_search import metaphor_find_similar
|
||||
from datetime import datetime, timedelta
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
|
||||
def is_valid_url(url):
|
||||
"""
|
||||
Check if the provided string is a valid URL.
|
||||
|
||||
Args:
|
||||
url (str): The URL to validate
|
||||
|
||||
Returns:
|
||||
bool: True if valid, False otherwise
|
||||
"""
|
||||
try:
|
||||
result = urllib.parse.urlparse(url)
|
||||
return all([result.scheme, result.netloc])
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
def competitor_analysis():
|
||||
# Initialize session state for progress bar visibility
|
||||
if 'show_progress' not in st.session_state:
|
||||
st.session_state.show_progress = True
|
||||
|
||||
st.title("Competitor Analysis")
|
||||
st.markdown("""**Use Cases:**
|
||||
- Know similar companies and alternatives for the given URL.
|
||||
- Write listicles, similar companies, Top tools, alternative-to, similar products, similar websites, etc.
|
||||
[Read More Here](https://docs.exa.ai/reference/company-analyst)
|
||||
""")
|
||||
|
||||
# URL input with validation
|
||||
similar_url = st.text_input(
|
||||
"👋 Enter a single valid URL for web analysis:",
|
||||
placeholder="https://example.com",
|
||||
help="Enter a complete URL including http:// or https://"
|
||||
)
|
||||
|
||||
# Validate URL
|
||||
url_valid = is_valid_url(similar_url) if similar_url else False
|
||||
if similar_url and not url_valid:
|
||||
st.error("⚠️ Please enter a valid URL including http:// or https://")
|
||||
|
||||
# Usecase selection with improved help
|
||||
usecase = st.selectbox(
|
||||
"Select Usecase",
|
||||
["similar companies", "listicles", "Top tools", "alternative-to", "similar products", "similar websites"],
|
||||
help="Choose the type of analysis you want to perform"
|
||||
)
|
||||
|
||||
# Default summary query based on usecase
|
||||
default_summary_queries = {
|
||||
"similar companies": "Find companies similar to this one, focusing on their business model, target audience, and market position",
|
||||
"listicles": "Find similar listicle articles about this topic, focusing on the structure and content",
|
||||
"Top tools": "Find top tools similar to this one, focusing on features, pricing, and user reviews",
|
||||
"alternative-to": "Find alternatives to this product or service, focusing on comparable features and pricing",
|
||||
"similar products": "Find products similar to this one, focusing on features, specifications, and use cases",
|
||||
"similar websites": "Find websites similar to this one, focusing on design, content, and functionality"
|
||||
}
|
||||
|
||||
# Advanced options using a modal dialog
|
||||
show_advanced = st.checkbox("Show Advanced Options", help="Configure additional search parameters")
|
||||
|
||||
# Initialize default values
|
||||
num_results = 5
|
||||
time_range = "Anytime"
|
||||
include_domains = []
|
||||
exclude_domains = []
|
||||
include_text = None
|
||||
exclude_text = None
|
||||
summary_query = default_summary_queries.get(usecase, "")
|
||||
|
||||
# Add custom CSS for card styling
|
||||
st.markdown("""
|
||||
<style>
|
||||
.card {
|
||||
background: linear-gradient(to right, #f0f8ff, #e6f3ff);
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
margin-bottom: 20px;
|
||||
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
||||
border-left: 4px solid #4a90e2;
|
||||
}
|
||||
.card-title {
|
||||
color: #2c5282;
|
||||
font-weight: bold;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Advanced options section
|
||||
if show_advanced:
|
||||
st.markdown("### 🔧 Advanced Search Options")
|
||||
|
||||
# Summary query with improved help in a card
|
||||
st.markdown('<div class="card"><div class="card-title">📝 Summary Query</div></div>', unsafe_allow_html=True)
|
||||
summary_query = st.text_area(
|
||||
"Customize the summary query",
|
||||
value=summary_query,
|
||||
placeholder="Enter a custom query for summarization based on your usecase",
|
||||
help="This query will be used to generate summaries of the similar content. Be specific about what you want to know."
|
||||
)
|
||||
|
||||
# Number of results with improved help in a card
|
||||
st.markdown('<div class="card"><div class="card-title">🔢 Number of Results</div></div>', unsafe_allow_html=True)
|
||||
num_results = st.slider(
|
||||
"How many results would you like?",
|
||||
min_value=1,
|
||||
max_value=20,
|
||||
value=5,
|
||||
step=1,
|
||||
help="How many similar results would you like to see?"
|
||||
)
|
||||
|
||||
# Progress bar visibility toggle
|
||||
st.markdown('<div class="card"><div class="card-title">🔄 Progress Display</div></div>', unsafe_allow_html=True)
|
||||
st.session_state.show_progress = st.toggle(
|
||||
"Show detailed progress bars",
|
||||
value=st.session_state.show_progress,
|
||||
help="Toggle to show or hide detailed progress bars during analysis"
|
||||
)
|
||||
|
||||
# Time range selection with improved styling in a card
|
||||
st.markdown('<div class="card"><div class="card-title">⏱️ Time Range</div></div>', unsafe_allow_html=True)
|
||||
time_range = st.radio(
|
||||
"Select time range for results",
|
||||
options=["Past Week", "Past Month", "Past Year", "Anytime"],
|
||||
index=3,
|
||||
horizontal=True,
|
||||
help="Filter results by when they were published"
|
||||
)
|
||||
|
||||
# Domain filters with improved styling in a card
|
||||
st.markdown('<div class="card"><div class="card-title">🌐 Domain Filters</div></div>', unsafe_allow_html=True)
|
||||
domain_filter_type = st.radio(
|
||||
"Domain Filter Type",
|
||||
options=["Include Domains", "Exclude Domains", "None"],
|
||||
index=2,
|
||||
horizontal=True,
|
||||
help="Include or exclude specific domains from search results"
|
||||
)
|
||||
|
||||
if domain_filter_type == "Include Domains":
|
||||
include_domains_input = st.text_input(
|
||||
"Include Domains (comma-separated)",
|
||||
placeholder="example.com, another-example.com",
|
||||
help="Only results from these domains will be included. Example: arxiv.org, paperswithcode.com"
|
||||
)
|
||||
if include_domains_input:
|
||||
include_domains = [domain.strip() for domain in include_domains_input.split(",")]
|
||||
|
||||
elif domain_filter_type == "Exclude Domains":
|
||||
exclude_domains_input = st.text_input(
|
||||
"Exclude Domains (comma-separated)",
|
||||
placeholder="example.com, another-example.com",
|
||||
help="Results from these domains will be excluded from search results"
|
||||
)
|
||||
if exclude_domains_input:
|
||||
exclude_domains = [domain.strip() for domain in exclude_domains_input.split(",")]
|
||||
|
||||
# Text filters with improved styling in a card
|
||||
st.markdown('<div class="card"><div class="card-title">📝 Text Filters</div></div>', unsafe_allow_html=True)
|
||||
text_filter_type = st.radio(
|
||||
"Text Filter Type",
|
||||
options=["Include Text", "Exclude Text", "None"],
|
||||
index=2,
|
||||
horizontal=True,
|
||||
help="Include or exclude results containing specific text"
|
||||
)
|
||||
|
||||
if text_filter_type == "Include Text":
|
||||
include_text = st.text_input(
|
||||
"Include Text",
|
||||
placeholder="large language model",
|
||||
help="Only results containing this phrase will be included (up to 5 words)"
|
||||
)
|
||||
|
||||
elif text_filter_type == "Exclude Text":
|
||||
exclude_text = st.text_input(
|
||||
"Exclude Text",
|
||||
placeholder="course",
|
||||
help="Results containing this phrase will be excluded (up to 5 words)"
|
||||
)
|
||||
|
||||
# Analyze button with validation
|
||||
if st.button("Analyze", disabled=not url_valid if similar_url else False):
|
||||
if similar_url and url_valid:
|
||||
try:
|
||||
# Create a progress container
|
||||
progress_container = st.empty()
|
||||
status_container = st.empty()
|
||||
results_container = st.empty()
|
||||
|
||||
# Display initial status
|
||||
status_container.info(f"Starting analysis for the URL: {similar_url}")
|
||||
|
||||
# Create a progress bar
|
||||
progress_bar = progress_container.progress(0)
|
||||
|
||||
# Update progress and status
|
||||
progress_bar.progress(10)
|
||||
status_container.info("Initializing search parameters...")
|
||||
|
||||
# Calculate date range based on selection
|
||||
start_date = None
|
||||
end_date = None
|
||||
|
||||
if time_range != "Anytime":
|
||||
end_date = datetime.now()
|
||||
if time_range == "Past Week":
|
||||
start_date = end_date - timedelta(days=7)
|
||||
elif time_range == "Past Month":
|
||||
start_date = end_date - timedelta(days=30)
|
||||
elif time_range == "Past Year":
|
||||
start_date = end_date - timedelta(days=365)
|
||||
|
||||
# Format dates for API if they exist
|
||||
start_published_date = start_date.strftime("%Y-%m-%dT%H:%M:%S.000Z") if start_date else None
|
||||
end_published_date = end_date.strftime("%Y-%m-%dT%H:%M:%S.999Z") if end_date else None
|
||||
|
||||
# Prepare summary query
|
||||
summary_query_param = None
|
||||
if summary_query:
|
||||
summary_query_param = {"query": summary_query}
|
||||
|
||||
# Update progress
|
||||
progress_bar.progress(20)
|
||||
status_container.info("Searching for similar content...")
|
||||
|
||||
# Call the metaphor_find_similar function with all parameters
|
||||
with st.spinner("Performing competitor analysis..."):
|
||||
# Update progress
|
||||
progress_bar.progress(30)
|
||||
status_container.info("Finding similar content...")
|
||||
|
||||
# Call the API
|
||||
df, search_response = metaphor_find_similar(
|
||||
similar_url=similar_url,
|
||||
usecase=usecase,
|
||||
num_results=num_results,
|
||||
start_published_date=start_published_date,
|
||||
end_published_date=end_published_date,
|
||||
include_domains=include_domains,
|
||||
exclude_domains=exclude_domains,
|
||||
include_text=include_text,
|
||||
exclude_text=exclude_text,
|
||||
summary_query=summary_query_param
|
||||
)
|
||||
|
||||
# Update progress
|
||||
progress_bar.progress(70)
|
||||
status_container.info("Processing and analyzing results...")
|
||||
|
||||
# Update progress to complete
|
||||
progress_bar.progress(100)
|
||||
status_container.success("Analysis completed successfully!")
|
||||
|
||||
# Display results using data editor
|
||||
if not df.empty:
|
||||
st.subheader("📊 Competitor Analysis Results")
|
||||
|
||||
# Add a download button for the results
|
||||
csv = df.to_csv(index=False)
|
||||
st.download_button(
|
||||
label="📥 Download Results as CSV",
|
||||
data=csv,
|
||||
file_name=f"competitor_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
|
||||
mime="text/csv",
|
||||
)
|
||||
|
||||
# Display the data editor
|
||||
st.data_editor(
|
||||
df,
|
||||
column_config={
|
||||
"Title": st.column_config.TextColumn(
|
||||
"Title",
|
||||
help="Title of the similar content",
|
||||
width="large",
|
||||
),
|
||||
"URL": st.column_config.LinkColumn(
|
||||
"URL",
|
||||
help="Link to the similar content",
|
||||
width="medium",
|
||||
display_text="Visit Website",
|
||||
),
|
||||
"Content Summary": st.column_config.TextColumn(
|
||||
"Content Summary",
|
||||
help="Summary of the similar content",
|
||||
width="large",
|
||||
),
|
||||
},
|
||||
hide_index=True,
|
||||
use_container_width=True,
|
||||
)
|
||||
|
||||
# Display additional insights
|
||||
st.subheader("🔍 Analysis Insights")
|
||||
|
||||
# Create columns for metrics
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
st.metric("Total Results", len(df))
|
||||
|
||||
with col2:
|
||||
# Calculate average content length
|
||||
avg_content_length = df["Content Summary"].str.len().mean()
|
||||
st.metric("Avg. Content Length", f"{avg_content_length:.0f} chars")
|
||||
|
||||
with col3:
|
||||
# Calculate unique domains
|
||||
unique_domains = len(set([url.split('/')[2] for url in df["URL"]]))
|
||||
st.metric("Unique Domains", unique_domains)
|
||||
|
||||
# Display full summaries in expanders
|
||||
st.subheader("📝 Detailed Competitor Summaries")
|
||||
|
||||
if 'competitor_summaries' in st.session_state and st.session_state.competitor_summaries:
|
||||
for url, data in st.session_state.competitor_summaries.items():
|
||||
with st.expander(f"📊 {data['title']}", expanded=False):
|
||||
st.markdown("### 📝 Detailed Competitor Analysis")
|
||||
st.markdown(data['summary'])
|
||||
|
||||
# Display raw data in an expander
|
||||
with st.expander("View Raw Data"):
|
||||
st.json(search_response)
|
||||
else:
|
||||
st.warning("No results found for the given URL and parameters.")
|
||||
except Exception as err:
|
||||
st.error(f"✖ 🚫 Failed to do similar search.\nError: {err}")
|
||||
else:
|
||||
st.error("Please enter a valid URL.")
|
||||
@@ -102,4 +102,4 @@ def content_planning_tools():
|
||||
#ai_agents_content_planner(plan_keywords)
|
||||
st.header("Coming Soon.")
|
||||
else:
|
||||
st.error("Come on, really, Enter some keywords to plan on..")
|
||||
st.error("Come on, really, Enter some keywords to plan on..")
|
||||
Reference in New Issue
Block a user