diff --git a/lib/alwrity_ui/similar_analysis.py b/lib/alwrity_ui/similar_analysis.py new file mode 100644 index 00000000..87ba7e54 --- /dev/null +++ b/lib/alwrity_ui/similar_analysis.py @@ -0,0 +1,338 @@ +import streamlit as st +from lib.ai_web_researcher.metaphor_basic_neural_web_search import metaphor_find_similar +from datetime import datetime, timedelta +import re +import urllib.parse + + +def is_valid_url(url): + """ + Check if the provided string is a valid URL. + + Args: + url (str): The URL to validate + + Returns: + bool: True if valid, False otherwise + """ + try: + result = urllib.parse.urlparse(url) + return all([result.scheme, result.netloc]) + except: + return False + + +def competitor_analysis(): + # Initialize session state for progress bar visibility + if 'show_progress' not in st.session_state: + st.session_state.show_progress = True + + st.title("Competitor Analysis") + st.markdown("""**Use Cases:** + - Know similar companies and alternatives for the given URL. + - Write listicles, similar companies, Top tools, alternative-to, similar products, similar websites, etc. + [Read More Here](https://docs.exa.ai/reference/company-analyst) + """) + + # URL input with validation + similar_url = st.text_input( + "👋 Enter a single valid URL for web analysis:", + placeholder="https://example.com", + help="Enter a complete URL including http:// or https://" + ) + + # Validate URL + url_valid = is_valid_url(similar_url) if similar_url else False + if similar_url and not url_valid: + st.error("⚠️ Please enter a valid URL including http:// or https://") + + # Usecase selection with improved help + usecase = st.selectbox( + "Select Usecase", + ["similar companies", "listicles", "Top tools", "alternative-to", "similar products", "similar websites"], + help="Choose the type of analysis you want to perform" + ) + + # Default summary query based on usecase + default_summary_queries = { + "similar companies": "Find companies similar to this one, focusing on their business model, target audience, and market position", + "listicles": "Find similar listicle articles about this topic, focusing on the structure and content", + "Top tools": "Find top tools similar to this one, focusing on features, pricing, and user reviews", + "alternative-to": "Find alternatives to this product or service, focusing on comparable features and pricing", + "similar products": "Find products similar to this one, focusing on features, specifications, and use cases", + "similar websites": "Find websites similar to this one, focusing on design, content, and functionality" + } + + # Advanced options using a modal dialog + show_advanced = st.checkbox("Show Advanced Options", help="Configure additional search parameters") + + # Initialize default values + num_results = 5 + time_range = "Anytime" + include_domains = [] + exclude_domains = [] + include_text = None + exclude_text = None + summary_query = default_summary_queries.get(usecase, "") + + # Add custom CSS for card styling + st.markdown(""" + + """, unsafe_allow_html=True) + + # Advanced options section + if show_advanced: + st.markdown("### 🔧 Advanced Search Options") + + # Summary query with improved help in a card + st.markdown('
📝 Summary Query
', unsafe_allow_html=True) + summary_query = st.text_area( + "Customize the summary query", + value=summary_query, + placeholder="Enter a custom query for summarization based on your usecase", + help="This query will be used to generate summaries of the similar content. Be specific about what you want to know." + ) + + # Number of results with improved help in a card + st.markdown('
🔢 Number of Results
', unsafe_allow_html=True) + num_results = st.slider( + "How many results would you like?", + min_value=1, + max_value=20, + value=5, + step=1, + help="How many similar results would you like to see?" + ) + + # Progress bar visibility toggle + st.markdown('
🔄 Progress Display
', unsafe_allow_html=True) + st.session_state.show_progress = st.toggle( + "Show detailed progress bars", + value=st.session_state.show_progress, + help="Toggle to show or hide detailed progress bars during analysis" + ) + + # Time range selection with improved styling in a card + st.markdown('
⏱️ Time Range
', unsafe_allow_html=True) + time_range = st.radio( + "Select time range for results", + options=["Past Week", "Past Month", "Past Year", "Anytime"], + index=3, + horizontal=True, + help="Filter results by when they were published" + ) + + # Domain filters with improved styling in a card + st.markdown('
🌐 Domain Filters
', unsafe_allow_html=True) + domain_filter_type = st.radio( + "Domain Filter Type", + options=["Include Domains", "Exclude Domains", "None"], + index=2, + horizontal=True, + help="Include or exclude specific domains from search results" + ) + + if domain_filter_type == "Include Domains": + include_domains_input = st.text_input( + "Include Domains (comma-separated)", + placeholder="example.com, another-example.com", + help="Only results from these domains will be included. Example: arxiv.org, paperswithcode.com" + ) + if include_domains_input: + include_domains = [domain.strip() for domain in include_domains_input.split(",")] + + elif domain_filter_type == "Exclude Domains": + exclude_domains_input = st.text_input( + "Exclude Domains (comma-separated)", + placeholder="example.com, another-example.com", + help="Results from these domains will be excluded from search results" + ) + if exclude_domains_input: + exclude_domains = [domain.strip() for domain in exclude_domains_input.split(",")] + + # Text filters with improved styling in a card + st.markdown('
📝 Text Filters
', unsafe_allow_html=True) + text_filter_type = st.radio( + "Text Filter Type", + options=["Include Text", "Exclude Text", "None"], + index=2, + horizontal=True, + help="Include or exclude results containing specific text" + ) + + if text_filter_type == "Include Text": + include_text = st.text_input( + "Include Text", + placeholder="large language model", + help="Only results containing this phrase will be included (up to 5 words)" + ) + + elif text_filter_type == "Exclude Text": + exclude_text = st.text_input( + "Exclude Text", + placeholder="course", + help="Results containing this phrase will be excluded (up to 5 words)" + ) + + # Analyze button with validation + if st.button("Analyze", disabled=not url_valid if similar_url else False): + if similar_url and url_valid: + try: + # Create a progress container + progress_container = st.empty() + status_container = st.empty() + results_container = st.empty() + + # Display initial status + status_container.info(f"Starting analysis for the URL: {similar_url}") + + # Create a progress bar + progress_bar = progress_container.progress(0) + + # Update progress and status + progress_bar.progress(10) + status_container.info("Initializing search parameters...") + + # Calculate date range based on selection + start_date = None + end_date = None + + if time_range != "Anytime": + end_date = datetime.now() + if time_range == "Past Week": + start_date = end_date - timedelta(days=7) + elif time_range == "Past Month": + start_date = end_date - timedelta(days=30) + elif time_range == "Past Year": + start_date = end_date - timedelta(days=365) + + # Format dates for API if they exist + start_published_date = start_date.strftime("%Y-%m-%dT%H:%M:%S.000Z") if start_date else None + end_published_date = end_date.strftime("%Y-%m-%dT%H:%M:%S.999Z") if end_date else None + + # Prepare summary query + summary_query_param = None + if summary_query: + summary_query_param = {"query": summary_query} + + # Update progress + progress_bar.progress(20) + status_container.info("Searching for similar content...") + + # Call the metaphor_find_similar function with all parameters + with st.spinner("Performing competitor analysis..."): + # Update progress + progress_bar.progress(30) + status_container.info("Finding similar content...") + + # Call the API + df, search_response = metaphor_find_similar( + similar_url=similar_url, + usecase=usecase, + num_results=num_results, + start_published_date=start_published_date, + end_published_date=end_published_date, + include_domains=include_domains, + exclude_domains=exclude_domains, + include_text=include_text, + exclude_text=exclude_text, + summary_query=summary_query_param + ) + + # Update progress + progress_bar.progress(70) + status_container.info("Processing and analyzing results...") + + # Update progress to complete + progress_bar.progress(100) + status_container.success("Analysis completed successfully!") + + # Display results using data editor + if not df.empty: + st.subheader("📊 Competitor Analysis Results") + + # Add a download button for the results + csv = df.to_csv(index=False) + st.download_button( + label="📥 Download Results as CSV", + data=csv, + file_name=f"competitor_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", + mime="text/csv", + ) + + # Display the data editor + st.data_editor( + df, + column_config={ + "Title": st.column_config.TextColumn( + "Title", + help="Title of the similar content", + width="large", + ), + "URL": st.column_config.LinkColumn( + "URL", + help="Link to the similar content", + width="medium", + display_text="Visit Website", + ), + "Content Summary": st.column_config.TextColumn( + "Content Summary", + help="Summary of the similar content", + width="large", + ), + }, + hide_index=True, + use_container_width=True, + ) + + # Display additional insights + st.subheader("🔍 Analysis Insights") + + # Create columns for metrics + col1, col2, col3 = st.columns(3) + + with col1: + st.metric("Total Results", len(df)) + + with col2: + # Calculate average content length + avg_content_length = df["Content Summary"].str.len().mean() + st.metric("Avg. Content Length", f"{avg_content_length:.0f} chars") + + with col3: + # Calculate unique domains + unique_domains = len(set([url.split('/')[2] for url in df["URL"]])) + st.metric("Unique Domains", unique_domains) + + # Display full summaries in expanders + st.subheader("📝 Detailed Competitor Summaries") + + if 'competitor_summaries' in st.session_state and st.session_state.competitor_summaries: + for url, data in st.session_state.competitor_summaries.items(): + with st.expander(f"📊 {data['title']}", expanded=False): + st.markdown("### 📝 Detailed Competitor Analysis") + st.markdown(data['summary']) + + # Display raw data in an expander + with st.expander("View Raw Data"): + st.json(search_response) + else: + st.warning("No results found for the given URL and parameters.") + except Exception as err: + st.error(f"✖ 🚫 Failed to do similar search.\nError: {err}") + else: + st.error("Please enter a valid URL.") \ No newline at end of file diff --git a/lib/utils/content_generators.py b/lib/utils/content_generators.py index e1c4a0a8..cf50529a 100644 --- a/lib/utils/content_generators.py +++ b/lib/utils/content_generators.py @@ -102,4 +102,4 @@ def content_planning_tools(): #ai_agents_content_planner(plan_keywords) st.header("Coming Soon.") else: - st.error("Come on, really, Enter some keywords to plan on..") + st.error("Come on, really, Enter some keywords to plan on..") \ No newline at end of file