AI Content planning and competitor analysis.

Tight integration with Alwrity, tavily and metaphor.
2025-04-06 17:20:38 +05:30
parent 8312dbaaac
commit 33a608dcdc
3 changed files with 126 additions and 60 deletions
--- a/lib/ai_web_researcher/metaphor_basic_neural_web_search.py
+++ b/lib/ai_web_researcher/metaphor_basic_neural_web_search.py
@@ -40,65 +40,142 @@ def get_metaphor_client():
    """
    METAPHOR_API_KEY = os.environ.get('METAPHOR_API_KEY')
    if not METAPHOR_API_KEY:
+        logger.error("METAPHOR_API_KEY environment variable not set!")
+        st.error("METAPHOR_API_KEY environment variable not set!")
        raise ValueError("METAPHOR_API_KEY environment variable not set!")
    return Exa(METAPHOR_API_KEY)


 def metaphor_rag_search():
    """ Mainly used for researching blog sections. """
+    # FIXME: Implement this.
    metaphor = get_metaphor_client()


-
-def metaphor_find_similar(similar_url):
+def metaphor_find_similar(similar_url, usecase, num_results=5, start_published_date=None, end_published_date=None, 
+                         include_domains=None, exclude_domains=None, include_text=None, exclude_text=None, 
+                         summary_query=None):
    """
    Find similar content using the Metaphor API.
-
    Args:
-        url (str): The URL to find similar content.
-
+        similar_url (str): The URL to find similar content.
+        usecase (str): The use case for the search (e.g., "similar companies", "listicles").
+        num_results (int): Number of results to return (default: 5).
+        start_published_date (str): Start date for filtering results in ISO format.
+        end_published_date (str): End date for filtering results in ISO format.
+        include_domains (list): List of domains to include in the search.
+        exclude_domains (list): List of domains to exclude from the search.
+        include_text (str): Text that must be included in the results.
+        exclude_text (str): Text that must be excluded from the results.
+        summary_query (dict): Custom query for summarization.
    Returns:
-        MetaphorResponse: The response from the Metaphor API.
+        tuple: (DataFrame, MetaphorResponse) - The DataFrame contains the results and the MetaphorResponse contains the raw API response.
    """
    metaphor = get_metaphor_client()
    try:
        logger.info(f"Doing similar web search for url: {similar_url}")
+        
+        # Prepare search parameters
+        search_params = {
+            "highlights": True,
+            "num_results": num_results,
+        }
+        
+        # Add date parameters if provided
+        if start_published_date:
+            search_params["start_published_date"] = start_published_date
+        if end_published_date:
+            search_params["end_published_date"] = end_published_date
+            
+        # Add domain filters if provided
+        if include_domains:
+            search_params["include_domains"] = include_domains
+        if exclude_domains:
+            search_params["exclude_domains"] = exclude_domains
+            
+        # Add text filters if provided
+        if include_text:
+            search_params["include_text"] = include_text
+        if exclude_text:
+            search_params["exclude_text"] = exclude_text
+            
+        # Add summary query if provided
+        if summary_query:
+            search_params["summary"] = summary_query
+        else:
+            # Default summary query based on usecase
+            search_params["summary"] = {"query": f"Find {usecase} similar to the given URL."}
+            
+        # Execute the search
        search_response = metaphor.find_similar_and_contents(
            similar_url,
-            highlights=True,
-            num_results=10)
+            **search_params
+        )
    except Exception as e:
        logger.error(f"Metaphor: Error in finding similar content: {e}")
        raise

    competitors = search_response.results
-    # Initialize lists to store titles and URLs
-    titles = []
-    urls = []
-
    # Initialize lists to store titles, URLs, and contents
    titles = []
    urls = []
    contents = []
    
    # Extract titles, URLs, and contents from the competitors
-    for c in competitors:
+    for i, c in enumerate(competitors):
+        # Update progress bar for each competitor
+        if st.session_state.get('show_progress', True):
+            progress_text = f"Processing competitor {i+1}/{len(competitors)}: {c.title[:30]}..."
+            progress_bar = st.progress(0, text=progress_text)
+        
        titles.append(c.title)
        urls.append(c.url)
-        # Simulate web content fetching and summarization (replace with actual logic)
        all_contents = ""
        try:
+            # Update progress
+            if st.session_state.get('show_progress', True):
+                progress_bar.progress(25, text=f"Fetching content for {c.title[:30]}...")
+                
            search_response = metaphor.search_and_contents(
                c.url,
                type="keyword",
                num_results=1
            )
            research_response = search_response.results
+            
+            # Update progress
+            if st.session_state.get('show_progress', True):
+                progress_bar.progress(50, text=f"Extracting text from {c.title[:30]}...")
+                
            for r in research_response:
                all_contents += r.text
-            c.text = summarize_competitor_content(all_contents)  # Replace with actual summarization function
+                
+            # Update progress
+            if st.session_state.get('show_progress', True):
+                progress_bar.progress(75, text=f"Summarizing content for {c.title[:30]}...")
+                
+            # Get the summary from the competitor content
+            summary_response = summarize_competitor_content(all_contents)
+            c.text = summary_response
+            
+            # Store the raw summary in session state for display in dialog
+            if 'competitor_summaries' not in st.session_state:
+                st.session_state.competitor_summaries = {}
+            st.session_state.competitor_summaries[c.url] = {
+                'title': c.title,
+                'summary': summary_response
+            }
+            
+            # Update progress to complete
+            if st.session_state.get('show_progress', True):
+                progress_bar.progress(100, text=f"Completed processing {c.title[:30]}")
+                
        except Exception as err:
            c.text = f"Failed to summarize content: {err}"
+            # Update progress to show error
+            if st.session_state.get('show_progress', True):
+                progress_bar.progress(100, text=f"Error processing {c.title[:30]}: {str(err)[:50]}...")
+                
        contents.append(c.text)
    
    # Create a DataFrame from the titles, URLs, and contents
@@ -107,13 +184,9 @@ def metaphor_find_similar(similar_url):
        "URL": urls,
        "Content Summary": contents
    })
-    # Display the DataFrame as a table
-    if not df.empty:
-        st.write("### Competitor Analysis Results")
-        st.table(df)
- 
-    print_search_result(competitors)
-    return search_response
+    
+    # Return the DataFrame and the search response
+    return df, search_response


 def calculate_date_range(time_range: str) -> tuple:
--- a/lib/utils/alwrity_utils.py
+++ b/lib/utils/alwrity_utils.py
@@ -3,7 +3,6 @@ import streamlit as st
 import tempfile
 from loguru import logger
 from lib.ai_web_researcher.gpt_online_researcher import gpt_web_researcher
-from lib.ai_web_researcher.metaphor_basic_neural_web_search import metaphor_find_similar
 from lib.ai_writers.keywords_to_blog_streamlit import write_blog_from_keywords
 from lib.ai_writers.speech_to_blog.main_audio_to_blog import generate_audio_blog
 from lib.ai_writers.long_form_ai_writer import long_form_generator
@@ -432,31 +431,6 @@ def ai_news_writer():
            st.error("Please enter valid keywords for the news report. 🚫")


-def competitor_analysis():
-    st.title("Competitor Analysis")
-    st.markdown("""**Use Cases:**
-        - Know similar companies and alternatives for the given URL.
-        - Write listicles, similar companies, Top tools, alternative-to, similar products, similar websites, etc.
-        [Read More Here](https://docs.exa.ai/reference/company-analyst)
-    """)
-
-    similar_url = st.text_input("👋 Enter a single valid URL for web analysis:",
-                placeholder="Provide a competitor's URL and get details of similar/alternative companies.")
-
-    if st.button("Analyze"):
-        if similar_url:
-            try:
-                st.info(f"Starting analysis for the URL: {similar_url}")
-                with st.spinner("Performing competitor analysis..."):
-                    result = metaphor_find_similar(similar_url)
-                st.success("Analysis completed successfully!")
-                st.write(result)
-            except Exception as err:
-                st.error(f"✖ 🚫 Failed to do similar search.\nError: {err}")
-        else:
-            st.error("Please enter a valid URL.")
-
-
 def ai_finance_ta_writer():
    st.markdown("<div class='sub-header'>AI Financial Technical Analysis Writer</div>", unsafe_allow_html=True)

--- a/lib/utils/content_generators.py
+++ b/lib/utils/content_generators.py
@@ -2,8 +2,10 @@ import streamlit as st
 from lib.utils.alwrity_utils import (
    blog_from_keyword, ai_agents_team, essay_writer, ai_news_writer,
    ai_finance_ta_writer
+    ai_finance_ta_writer
 )
 from lib.alwrity_ui.similar_analysis import competitor_analysis
+from lib.alwrity_ui.similar_analysis import competitor_analysis
 from lib.alwrity_ui.keyword_web_researcher import do_web_research
 from lib.ai_writers.ai_story_writer.story_writer import story_input_section
 from lib.ai_writers.ai_product_description_writer import write_ai_prod_desc
@@ -85,7 +87,7 @@ def content_planning_tools():
    tab_keywords, tab_competitor, tab_calendar = st.tabs([
        "🔍 Keywords Researcher",
        "📊 Competitor Analysis",
-        "📅 Content Calendar Ideator"
+        "📅 Content Calendar Ideator (Coming Soon)"
    ])
    
    # Keywords Researcher tab
@@ -98,14 +100,31 @@ def content_planning_tools():
        
    # Content Calendar Ideator tab
    with tab_calendar:
-        plan_keywords = st.text_input(
-            "**Enter Your main Keywords to get 2 months content calendar:**",
-            placeholder="Enter 2-3 main keywords to generate AI content calendar with keyword researched blog titles",
-            help="The keywords are the ones where you would want to generate 50-60 blogs/articles on."
-        )
-        if st.button("**Ideate Content Calendar**"):
-            if plan_keywords:
-                #ai_agents_content_planner(plan_keywords)
-                st.header("Coming Soon.")
-            else:
-                st.error("Come on, really, Enter some keywords to plan on..")
+        st.info("🚧 **Coming Soon!** This feature is currently under development and will be available in a future update.")
+        st.markdown("""
+        <div style='background-color: #f0f2f6; padding: 15px; border-radius: 5px; margin-bottom: 20px;'>
+            <h3 style='margin-top: 0;'>📅 Content Calendar Ideator</h3>
+            <p>The Content Calendar Ideator will help you:</p>
+            <ul>
+                <li>Generate months-long content calendars around your keywords</li>
+                <li>Get AI-suggested blog titles and topics</li>
+                <li>Plan your content strategy with data-driven insights</li>
+                <li>Organize your content creation schedule</li>
+            </ul>
+            <p><strong>Stay tuned for updates!</strong></p>
+        </div>
+        """, unsafe_allow_html=True)
+        
+        # Keep the original functionality but hide it behind a "Preview" button
+        with st.expander("Preview Feature (Under Development)", expanded=False):
+            plan_keywords = st.text_input(
+                "**Enter Your main Keywords to get 2 months content calendar:**",
+                placeholder="Enter 2-3 main keywords to generate AI content calendar with keyword researched blog titles",
+                help="The keywords are the ones where you would want to generate 50-60 blogs/articles on."
+            )
+            if st.button("**Ideate Content Calendar**"):
+                if plan_keywords:
+                    #ai_agents_content_planner(plan_keywords)
+                    st.header("Coming Soon.")
+                else:
+                    st.error("Come on, really, Enter some keywords to plan on..")