story illustrator and story video generator, AI web researcher fixes

2025-05-02 23:09:43 +05:30
parent cda275f1cc
commit 19ff21a8a1
13 changed files with 2625 additions and 186 deletions
--- a/lib/ai_web_researcher/gpt_online_researcher.py
+++ b/lib/ai_web_researcher/gpt_online_researcher.py
@@ -150,7 +150,7 @@ def gpt_web_researcher(search_keywords, search_mode, **kwargs):
                
                # Pass the parameters to do_tavily_ai_search
                t_results = do_tavily_ai_search(
-                    keywords=search_keywords, 
+                    search_keywords,  # Pass as positional argument
                    max_results=kwargs.get('num_results', 10),
                    include_domains=include_domains,
                    search_depth=search_depth,
@@ -258,7 +258,7 @@ def do_google_serp_search(search_keywords, status_container, update_progress, **
    
    try:
        # Validate parameters
-        update_progress("Validating search parameters")
+        update_progress("Validating search parameters", progress=0.1)
        status_container.info("📝 Validating parameters...")
        
        if not search_keywords or not isinstance(search_keywords, str):
@@ -266,7 +266,7 @@ def do_google_serp_search(search_keywords, status_container, update_progress, **
            raise ValueError("Search keywords must be a non-empty string")
        
        # Update search initiation
-        update_progress(f"Initiating search for: '{search_keywords}'")
+        update_progress(f"Initiating search for: '{search_keywords}'", progress=0.2)
        status_container.info("🌐 Querying search API...")
        logger.info(f"Search params: {kwargs}")
        
@@ -275,26 +275,26 @@ def do_google_serp_search(search_keywords, status_container, update_progress, **
        
        if g_results:
            # Log success
-            update_progress("Search completed successfully", "success")
+            update_progress("Search completed successfully", progress=0.8, level="success")
            
            # Update statistics
            stats = f"""Found:
                - {len(g_results.get('organic', []))} organic results
                - {len(g_results.get('peopleAlsoAsk', []))} related questions
                - {len(g_results.get('relatedSearches', []))} related searches"""
-            update_progress(stats)
+            update_progress(stats, progress=0.9)
            
            # Process results
-            update_progress("Processing search results")
+            update_progress("Processing search results", progress=0.95)
            status_container.info("⚡ Processing results...")
            processed_results = process_search_results(g_results)
            
            # Extract titles
-            update_progress("Extracting information")
+            update_progress("Extracting information", progress=0.98)
            g_titles = extract_info(g_results, 'titles')
            
            # Final success
-            update_progress("Analysis completed successfully", "success")
+            update_progress("Analysis completed successfully", progress=1.0, level="success")
            status_container.success("✨ Research completed!")
            
            # Clear main status after delay
@@ -313,13 +313,13 @@ def do_google_serp_search(search_keywords, status_container, update_progress, **
            }
            
        else:
-            update_progress("No results found", "warning")
+            update_progress("No results found", progress=0.5, level="warning")
            status_container.warning("⚠️ No results found")
            return None
            
    except Exception as err:
        error_msg = f"Search failed: {str(err)}"
-        update_progress(error_msg, "error")
+        update_progress(error_msg, progress=0.5, level="error")
        logger.error(error_msg)
        logger.debug("Stack trace:", exc_info=True)
        raise
@@ -343,8 +343,11 @@ def do_tavily_ai_search(search_keywords, max_results=10, **kwargs):
            'include_domains': kwargs.get('include_domains', [""]) if kwargs.get('include_domains') else [""]
        }
        
-        # Pass the parameters to do_tavily_ai_search
-        t_results = do_tavily_ai_search(
+        # Import the Tavily search function directly
+        from .tavily_ai_search import do_tavily_ai_search as tavily_search
+        
+        # Call the actual Tavily search function
+        t_results = tavily_search(
            keywords=search_keywords,
            **tavily_params
        )
--- a/lib/ai_web_researcher/metaphor_basic_neural_web_search.py
+++ b/lib/ai_web_researcher/metaphor_basic_neural_web_search.py
@@ -65,28 +65,22 @@ def metaphor_rag_search():
    
    return processed_results

-def metaphor_find_similar(similar_url, usecase, num_results=5, start_published_date=None, end_published_date=None, 
-                         include_domains=None, exclude_domains=None, include_text=None, exclude_text=None, 
-                         summary_query=None):
-    """
-    Find similar content using the Metaphor API.
-    Args:
-        similar_url (str): The URL to find similar content.
-        usecase (str): The use case for the search (e.g., "similar companies", "listicles").
-        num_results (int): Number of results to return (default: 5).
-        start_published_date (str): Start date for filtering results in ISO format.
-        end_published_date (str): End date for filtering results in ISO format.
-        include_domains (list): List of domains to include in the search.
-        exclude_domains (list): List of domains to exclude from the search.
-        include_text (str): Text that must be included in the results.
-        exclude_text (str): Text that must be excluded from the results.
-        summary_query (dict): Custom query for summarization.
-    Returns:
-        tuple: (DataFrame, MetaphorResponse) - The DataFrame contains the results and the MetaphorResponse contains the raw API response.
-    """
-    metaphor = get_metaphor_client()
+def metaphor_find_similar(similar_url, usecase, num_results=5, start_published_date=None, end_published_date=None,
+                         include_domains=None, exclude_domains=None, include_text=None, exclude_text=None,
+                         summary_query=None, progress_bar=None):
+    """Find similar content using Metaphor API."""
+    
    try:
-        logger.info(f"Doing similar web search for url: {similar_url}")
+        # Initialize progress if not provided
+        if progress_bar is None:
+            progress_bar = st.progress(0.0)
+        
+        # Update progress
+        progress_bar.progress(0.1, text="Initializing search...")
+        
+        # Get Metaphor client
+        metaphor = get_metaphor_client()
+        logger.info(f"Initialized Metaphor client for URL: {similar_url}")
        
        # Prepare search parameters
        search_params = {
@@ -94,113 +88,83 @@ def metaphor_find_similar(similar_url, usecase, num_results=5, start_published_d
            "num_results": num_results,
        }
        
-        # Add date parameters if provided
+        # Add optional parameters if provided
        if start_published_date:
            search_params["start_published_date"] = start_published_date
        if end_published_date:
            search_params["end_published_date"] = end_published_date
-            
-        # Add domain filters if provided
        if include_domains:
            search_params["include_domains"] = include_domains
        if exclude_domains:
            search_params["exclude_domains"] = exclude_domains
-            
-        # Add text filters if provided
        if include_text:
            search_params["include_text"] = include_text
        if exclude_text:
            search_params["exclude_text"] = exclude_text
            
-        # Add summary query if provided
+        # Add summary query
        if summary_query:
            search_params["summary"] = summary_query
        else:
-            # Default summary query based on usecase
            search_params["summary"] = {"query": f"Find {usecase} similar to the given URL."}
-            
-        # Execute the search
+        
+        logger.debug(f"Search parameters: {search_params}")
+        
+        # Update progress
+        progress_bar.progress(0.2, text="Preparing search parameters...")
+        
+        # Make API call
+        logger.info("Calling Metaphor API find_similar_and_contents...")
        search_response = metaphor.find_similar_and_contents(
            similar_url,
            **search_params
        )
+        
+        if search_response and hasattr(search_response, 'results'):
+            competitors = search_response.results
+            total_results = len(competitors)
+            
+            # Update progress
+            progress_bar.progress(0.3, text=f"Found {total_results} results...")
+            
+            # Process results
+            processed_results = []
+            for i, result in enumerate(competitors):
+                # Calculate progress as decimal (0.0-1.0)
+                progress = 0.3 + (0.6 * (i / total_results))
+                progress_text = f"Processing result {i+1}/{total_results}..."
+                progress_bar.progress(progress, text=progress_text)
+                
+                # Process each result
+                processed_result = {
+                    "Title": result.title,
+                    "URL": result.url,
+                    "Content Summary": result.text if hasattr(result, 'text') else "No content available"
+                }
+                processed_results.append(processed_result)
+            
+            # Update progress
+            progress_bar.progress(0.9, text="Finalizing results...")
+            
+            # Create DataFrame
+            df = pd.DataFrame(processed_results)
+            
+            # Update progress
+            progress_bar.progress(1.0, text="Analysis completed!")
+            
+            return df, search_response
+            
+        else:
+            logger.warning("No results found in search response")
+            progress_bar.progress(1.0, text="No results found")
+            return pd.DataFrame(), search_response
+            
    except Exception as e:
-        logger.error(f"Metaphor: Error in finding similar content: {e}")
+        logger.error(f"Error in metaphor_find_similar: {str(e)}", exc_info=True)
+        if progress_bar:
+            progress_bar.progress(1.0, text="Error occurred during analysis")
        raise

-    competitors = search_response.results
-    # Initialize lists to store titles, URLs, and contents
-    titles = []
-    urls = []
-    contents = []
-    progress_bar = st.progress(0, text="Starting competitor analysis...")   
-    
-    # Extract titles, URLs, and contents from the competitors
-    for i, c in enumerate(competitors):
-        # Update progress bar for each competitor
-        if st.session_state.get('show_progress', True):
-            progress_text = f"Processing competitor {i+1}/{len(competitors)}: {c.title[:30]}..."
-            progress_bar.progress((i / len(competitors)) * 100, text=progress_text)
-        titles.append(c.title)
-        urls.append(c.url)
-        all_contents = ""
-        try:
-            # Update progress
-            if st.session_state.get('show_progress', True):
-                progress_bar.progress(25, text=f"Fetching content for {c.title[:30]}...")
-                
-            search_response = metaphor.search_and_contents(
-                c.url,
-                type="keyword",
-                num_results=1
-            )
-            research_response = search_response.results
-            
-            # Update progress
-            if st.session_state.get('show_progress', True):
-                progress_bar.progress(50, text=f"Extracting text from {c.title[:30]}...")
-                
-            for r in research_response:
-                all_contents += r.text
-                
-            # Update progress
-            if st.session_state.get('show_progress', True):
-                progress_bar.progress(75, text=f"Summarizing content for {c.title[:30]}...")
-                
-            # Get the summary from the competitor content
-            summary_response = summarize_competitor_content(all_contents)
-            c.text = summary_response
-            
-            # Store the raw summary in session state for display in dialog
-            if 'competitor_summaries' not in st.session_state:
-                st.session_state.competitor_summaries = {}
-            st.session_state.competitor_summaries[c.url] = {
-                'title': c.title,
-                'summary': summary_response
-            }
-            
-            # Update progress to complete
-            if st.session_state.get('show_progress', True):
-                progress_bar.progress(100, text=f"Completed processing {c.title[:30]}")
-                
-        except Exception as err:
-            c.text = f"Failed to summarize content: {err}"
-            # Update progress to show error
-            if st.session_state.get('show_progress', True):
-                progress_bar.progress(100, text=f"Error processing {c.title[:30]}: {str(err)[:50]}...")
-                
-        contents.append(c.text)
-    
-    # Create a DataFrame from the titles, URLs, and contents
-    df = pd.DataFrame({
-        "Title": titles,
-        "URL": urls,
-        "Content Summary": contents
-    })
-    
-    # Return the DataFrame and the search response
-    return df, search_response
-

 def calculate_date_range(time_range: str) -> tuple:
    """