AI Blog Rewriter Updater feature complete
This commit is contained in:
@@ -497,6 +497,8 @@ def metaphor_news_summarizer(news_keywords):
|
|||||||
""" build a LLM-based news summarizer app with the Exa API to keep us up-to-date
|
""" build a LLM-based news summarizer app with the Exa API to keep us up-to-date
|
||||||
with the latest news on a given topic.
|
with the latest news on a given topic.
|
||||||
"""
|
"""
|
||||||
|
exa = get_metaphor_client()
|
||||||
|
|
||||||
# FIXME: Needs to be user defined.
|
# FIXME: Needs to be user defined.
|
||||||
one_week_ago = (datetime.now() - timedelta(days=7))
|
one_week_ago = (datetime.now() - timedelta(days=7))
|
||||||
date_cutoff = one_week_ago.strftime("%Y-%m-%d")
|
date_cutoff = one_week_ago.strftime("%Y-%m-%d")
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from lib.ai_writers.ai_story_writer.story_writer import story_input_section
|
|||||||
from lib.ai_writers.ai_product_description_writer import write_ai_prod_desc
|
from lib.ai_writers.ai_product_description_writer import write_ai_prod_desc
|
||||||
from lib.ai_writers.ai_copywriter.copywriter_dashboard import copywriter_dashboard
|
from lib.ai_writers.ai_copywriter.copywriter_dashboard import copywriter_dashboard
|
||||||
from lib.ai_writers.linkedin_writer import LinkedInAIWriter
|
from lib.ai_writers.linkedin_writer import LinkedInAIWriter
|
||||||
|
from lib.ai_writers.blog_rewriter_updater.ai_blog_rewriter import write_blog_rewriter
|
||||||
#from lib.content_planning_calender.content_planning_agents_alwrity_crew import ai_agents_content_planner
|
#from lib.content_planning_calender.content_planning_agents_alwrity_crew import ai_agents_content_planner
|
||||||
from lib.ai_writers.ai_blog_writer.ai_blog_generator import ai_blog_writer_page
|
from lib.ai_writers.ai_blog_writer.ai_blog_generator import ai_blog_writer_page
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
@@ -20,6 +21,14 @@ def list_ai_writers():
|
|||||||
"function": ai_blog_writer_page,
|
"function": ai_blog_writer_page,
|
||||||
"path": "ai_blog_writer"
|
"path": "ai_blog_writer"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "AI Blog Rewriter",
|
||||||
|
"icon": "🔄",
|
||||||
|
"description": "Rewrite and update existing blog content with improved quality and SEO optimization",
|
||||||
|
"category": "Content Creation",
|
||||||
|
"function": write_blog_rewriter,
|
||||||
|
"path": "blog_rewriter"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Story Writer",
|
"name": "Story Writer",
|
||||||
"icon": "📚",
|
"icon": "📚",
|
||||||
|
|||||||
163
lib/ai_writers/blog_rewriter_updater/README.md
Normal file
163
lib/ai_writers/blog_rewriter_updater/README.md
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
# AI Blog Rewriter & Updater
|
||||||
|
|
||||||
|
A powerful AI-powered tool for rewriting and updating existing blog content with improved quality, factual accuracy, and SEO optimization.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
### 1. Content Import
|
||||||
|
- **URL Import**: Automatically extract content from any blog URL
|
||||||
|
- **Manual Input**: Paste content directly with title, meta description, and author information
|
||||||
|
- **Smart Content Extraction**: Preserves structure, headings, images, and metadata
|
||||||
|
|
||||||
|
### 2. Content Analysis
|
||||||
|
- **Metrics Analysis**:
|
||||||
|
- Word count
|
||||||
|
- Sentence count
|
||||||
|
- Paragraph count
|
||||||
|
- Average words per sentence
|
||||||
|
- Average sentences per paragraph
|
||||||
|
- **Structure Analysis**:
|
||||||
|
- Heading hierarchy
|
||||||
|
- Content organization
|
||||||
|
- Image analysis
|
||||||
|
- **Age Analysis**:
|
||||||
|
- Content age calculation
|
||||||
|
- Publication date detection
|
||||||
|
|
||||||
|
### 3. Web Research
|
||||||
|
- **Topic Extraction**: Automatically identifies key topics for fact-checking
|
||||||
|
- **Multi-Source Research**: Gathers information from various sources
|
||||||
|
- **Research Depth Control**: Choose between low, medium, and high research depth
|
||||||
|
- **Source Organization**: Categorizes research by topic with source details
|
||||||
|
|
||||||
|
### 4. Rewriting Modes
|
||||||
|
- **Standard Rewrite**: Improve clarity and flow while maintaining core message
|
||||||
|
- **SEO Optimization**: Enhance content for search engines with targeted keywords
|
||||||
|
- **Simplification**: Make complex content more accessible
|
||||||
|
- **Expansion**: Add more details and examples
|
||||||
|
- **Fact Check**: Update outdated information
|
||||||
|
- **Tone Shift**: Change writing style while preserving content
|
||||||
|
- **Modernization**: Update with current information and trends
|
||||||
|
|
||||||
|
### 5. Customization Options
|
||||||
|
- **Tone Selection**:
|
||||||
|
- Professional
|
||||||
|
- Conversational
|
||||||
|
- Academic
|
||||||
|
- Enthusiastic
|
||||||
|
- Authoritative
|
||||||
|
- Friendly
|
||||||
|
- Technical
|
||||||
|
- Inspirational
|
||||||
|
- **Length Control**:
|
||||||
|
- Maintain original length
|
||||||
|
- Create shorter version
|
||||||
|
- Create longer version
|
||||||
|
- Custom word count
|
||||||
|
- **SEO Features**:
|
||||||
|
- Focus keyword optimization
|
||||||
|
- Meta description generation
|
||||||
|
- Title optimization
|
||||||
|
- **Special Instructions**: Add custom requirements for the rewrite
|
||||||
|
|
||||||
|
### 6. Image Generation
|
||||||
|
- **AI Image Suggestions**: Get recommendations for relevant images
|
||||||
|
- **Custom Image Generation**: Create images based on content
|
||||||
|
- **Style Options**:
|
||||||
|
- Realistic
|
||||||
|
- Artistic
|
||||||
|
- Cartoon
|
||||||
|
- 3D Render
|
||||||
|
- **Image Placement**: Suggested optimal placement within content
|
||||||
|
|
||||||
|
### 7. Export Options
|
||||||
|
- **Preview Mode**: View formatted content
|
||||||
|
- **Markdown Export**: Get clean markdown version
|
||||||
|
- **Image Integration**: Include generated images with captions
|
||||||
|
- **Meta Information**: Export with optimized title and meta description
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
1. **Import Content**
|
||||||
|
- Choose between URL import or manual content entry
|
||||||
|
- Provide necessary metadata (title, author, etc.)
|
||||||
|
|
||||||
|
2. **Analysis & Research**
|
||||||
|
- Review content analysis metrics
|
||||||
|
- Examine research findings
|
||||||
|
- Identify areas for improvement
|
||||||
|
|
||||||
|
3. **Configure Rewrite Settings**
|
||||||
|
- Select rewrite mode
|
||||||
|
- Choose target tone
|
||||||
|
- Set content length
|
||||||
|
- Add focus keywords
|
||||||
|
- Provide special instructions
|
||||||
|
|
||||||
|
4. **Review & Export**
|
||||||
|
- Preview rewritten content
|
||||||
|
- Generate suggested images
|
||||||
|
- Export in desired format
|
||||||
|
|
||||||
|
## Technical Details
|
||||||
|
|
||||||
|
### Dependencies
|
||||||
|
- Streamlit for UI
|
||||||
|
- BeautifulSoup for content extraction
|
||||||
|
- GPT providers for text generation
|
||||||
|
- Image generation capabilities
|
||||||
|
- Web research APIs (Exa, Tavily)
|
||||||
|
|
||||||
|
### Key Components
|
||||||
|
- `BlogRewriter` class: Core functionality
|
||||||
|
- Content extraction and analysis
|
||||||
|
- Research integration
|
||||||
|
- AI-powered rewriting
|
||||||
|
- Image generation
|
||||||
|
- Export capabilities
|
||||||
|
|
||||||
|
### Error Handling
|
||||||
|
- Robust error handling for URL extraction
|
||||||
|
- Fallback mechanisms for content parsing
|
||||||
|
- Graceful degradation for API failures
|
||||||
|
- User-friendly error messages
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
1. **Content Import**
|
||||||
|
- Use clean, well-structured URLs
|
||||||
|
- Provide complete metadata for manual entry
|
||||||
|
- Ensure content is properly formatted
|
||||||
|
|
||||||
|
2. **Research Settings**
|
||||||
|
- Choose appropriate research depth
|
||||||
|
- Review research findings carefully
|
||||||
|
- Verify source credibility
|
||||||
|
|
||||||
|
3. **Rewrite Configuration**
|
||||||
|
- Select appropriate tone for audience
|
||||||
|
- Use relevant focus keywords
|
||||||
|
- Provide clear special instructions
|
||||||
|
|
||||||
|
4. **Image Generation**
|
||||||
|
- Use descriptive prompts
|
||||||
|
- Choose appropriate style
|
||||||
|
- Consider image placement
|
||||||
|
|
||||||
|
## Limitations
|
||||||
|
|
||||||
|
- Maximum content length for processing
|
||||||
|
- API rate limits for research
|
||||||
|
- Image generation constraints
|
||||||
|
- Language support limitations
|
||||||
|
|
||||||
|
## Future Enhancements
|
||||||
|
|
||||||
|
- Multi-language support
|
||||||
|
- Advanced SEO analysis
|
||||||
|
- Content structure templates
|
||||||
|
- Collaborative editing
|
||||||
|
- Integration with CMS platforms
|
||||||
|
- Custom AI model selection
|
||||||
|
- Advanced image editing
|
||||||
|
- Content versioning
|
||||||
11
lib/ai_writers/blog_rewriter_updater/ai_blog_rewriter.py
Normal file
11
lib/ai_writers/blog_rewriter_updater/ai_blog_rewriter.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
"""
|
||||||
|
AI Blog Rewriter Module
|
||||||
|
|
||||||
|
This module provides the main entry point for the blog rewriter functionality,
|
||||||
|
importing and using the utility and UI modules.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .blog_rewriter_ui import write_blog_rewriter
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
write_blog_rewriter()
|
||||||
@@ -1,527 +1,14 @@
|
|||||||
"""
|
"""
|
||||||
AI Blog Rewriter Module
|
Blog Rewriter UI Module
|
||||||
|
|
||||||
This module provides functionality to rewrite and update existing blog content
|
This module contains the Streamlit interface for the blog rewriter,
|
||||||
with improved quality, factual accuracy, and SEO optimization.
|
providing a user-friendly way to interact with the rewriting functionality.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
import logging
|
|
||||||
from typing import Dict, List, Tuple, Optional, Any
|
|
||||||
import json
|
import json
|
||||||
import os
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from .blog_rewriter_utils import BlogRewriter, REWRITE_MODES, TONE_OPTIONS, MAX_META_DESCRIPTION_LENGTH
|
||||||
# Import required modules from the project
|
|
||||||
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
|
||||||
from ..gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
|
|
||||||
from ..web_research.exa_search import exa_search
|
|
||||||
from ..web_research.tavily_search import tavily_search
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Define constants
|
|
||||||
MAX_TITLE_LENGTH = 70
|
|
||||||
MAX_META_DESCRIPTION_LENGTH = 160
|
|
||||||
REWRITE_MODES = {
|
|
||||||
"standard": "Standard rewrite with improved clarity and flow",
|
|
||||||
"seo_optimization": "Optimize for search engines with targeted keywords",
|
|
||||||
"simplification": "Simplify complex content for broader audience",
|
|
||||||
"expansion": "Expand with additional details and examples",
|
|
||||||
"fact_check": "Focus on fact-checking and updating information",
|
|
||||||
"tone_shift": "Change the tone while preserving content",
|
|
||||||
"modernization": "Update outdated content with current information"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Define tone options
|
|
||||||
TONE_OPTIONS = [
|
|
||||||
"Professional", "Conversational", "Academic", "Enthusiastic",
|
|
||||||
"Authoritative", "Friendly", "Technical", "Inspirational"
|
|
||||||
]
|
|
||||||
|
|
||||||
class BlogRewriter:
|
|
||||||
"""Class to handle blog rewriting functionality."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
"""Initialize the BlogRewriter class."""
|
|
||||||
self.original_content = {}
|
|
||||||
self.rewritten_content = {}
|
|
||||||
self.research_results = {}
|
|
||||||
self.content_analysis = {}
|
|
||||||
self.image_suggestions = []
|
|
||||||
|
|
||||||
def extract_content_from_url(self, url: str) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Extract content from a given URL.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
url: The URL to extract content from
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing extracted content
|
|
||||||
"""
|
|
||||||
logger.info(f"Extracting content from URL: {url}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
headers = {
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
||||||
}
|
|
||||||
response = requests.get(url, headers=headers, timeout=15)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
|
||||||
|
|
||||||
# Extract title
|
|
||||||
title = soup.title.string if soup.title else ""
|
|
||||||
|
|
||||||
# Extract meta description
|
|
||||||
meta_desc = ""
|
|
||||||
meta_tag = soup.find("meta", attrs={"name": "description"})
|
|
||||||
if meta_tag and "content" in meta_tag.attrs:
|
|
||||||
meta_desc = meta_tag["content"]
|
|
||||||
|
|
||||||
# Extract main content - this is a simplified approach
|
|
||||||
# In a real implementation, you'd want more sophisticated content extraction
|
|
||||||
content = ""
|
|
||||||
article_tag = soup.find("article")
|
|
||||||
if article_tag:
|
|
||||||
content = article_tag.get_text(separator="\\n\\n")
|
|
||||||
else:
|
|
||||||
# Try to find main content by looking for common content containers
|
|
||||||
main_content = soup.find(["main", "div", "section"], class_=re.compile(r"content|article|post|entry"))
|
|
||||||
if main_content:
|
|
||||||
# Remove navigation, sidebars, comments, etc.
|
|
||||||
for elem in main_content.find_all(["nav", "aside", "footer", "comments", "script", "style"]):
|
|
||||||
elem.decompose()
|
|
||||||
content = main_content.get_text(separator="\\n\\n")
|
|
||||||
else:
|
|
||||||
# Fallback to body content
|
|
||||||
body = soup.find("body")
|
|
||||||
if body:
|
|
||||||
content = body.get_text(separator="\\n\\n")
|
|
||||||
|
|
||||||
# Clean up the content
|
|
||||||
content = re.sub(r'\\n{3,}', '\\n\\n', content) # Remove excessive newlines
|
|
||||||
content = re.sub(r'\s{2,}', ' ', content) # Remove excessive spaces
|
|
||||||
|
|
||||||
# Extract headings for structure analysis
|
|
||||||
headings = []
|
|
||||||
for h in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
|
|
||||||
headings.append({
|
|
||||||
"level": int(h.name[1]),
|
|
||||||
"text": h.get_text().strip()
|
|
||||||
})
|
|
||||||
|
|
||||||
# Extract images
|
|
||||||
images = []
|
|
||||||
for img in soup.find_all("img"):
|
|
||||||
if img.get("src") and not img.get("src").startswith("data:"):
|
|
||||||
image_url = img.get("src")
|
|
||||||
if not image_url.startswith(("http://", "https://")):
|
|
||||||
# Convert relative URL to absolute
|
|
||||||
base_url = "/".join(url.split("/")[:3]) # Get domain
|
|
||||||
image_url = f"{base_url}/{image_url.lstrip('/')}"
|
|
||||||
|
|
||||||
alt_text = img.get("alt", "")
|
|
||||||
images.append({
|
|
||||||
"url": image_url,
|
|
||||||
"alt_text": alt_text
|
|
||||||
})
|
|
||||||
|
|
||||||
# Extract publish date if available
|
|
||||||
publish_date = None
|
|
||||||
date_meta = soup.find("meta", attrs={"property": "article:published_time"})
|
|
||||||
if date_meta and "content" in date_meta.attrs:
|
|
||||||
publish_date = date_meta["content"]
|
|
||||||
else:
|
|
||||||
# Try common date patterns in the HTML
|
|
||||||
date_elem = soup.find(["time", "span", "div"], class_=re.compile(r"date|time|publish"))
|
|
||||||
if date_elem and date_elem.get_text():
|
|
||||||
publish_date = date_elem.get_text().strip()
|
|
||||||
|
|
||||||
# Extract author if available
|
|
||||||
author = None
|
|
||||||
author_meta = soup.find("meta", attrs={"name": "author"})
|
|
||||||
if author_meta and "content" in author_meta.attrs:
|
|
||||||
author = author_meta["content"]
|
|
||||||
else:
|
|
||||||
# Try common author patterns in the HTML
|
|
||||||
author_elem = soup.find(["a", "span", "div"], class_=re.compile(r"author|byline"))
|
|
||||||
if author_elem and author_elem.get_text():
|
|
||||||
author = author_elem.get_text().strip()
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"meta_description": meta_desc,
|
|
||||||
"content": content,
|
|
||||||
"headings": headings,
|
|
||||||
"images": images,
|
|
||||||
"publish_date": publish_date,
|
|
||||||
"author": author,
|
|
||||||
"url": url
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error extracting content from URL: {e}")
|
|
||||||
return {
|
|
||||||
"title": "",
|
|
||||||
"meta_description": "",
|
|
||||||
"content": "",
|
|
||||||
"headings": [],
|
|
||||||
"images": [],
|
|
||||||
"publish_date": None,
|
|
||||||
"author": None,
|
|
||||||
"url": url,
|
|
||||||
"error": str(e)
|
|
||||||
}
|
|
||||||
|
|
||||||
def analyze_content(self, content: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Analyze the extracted content to provide insights.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: Dictionary containing extracted content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing content analysis
|
|
||||||
"""
|
|
||||||
logger.info("Analyzing content")
|
|
||||||
|
|
||||||
analysis = {}
|
|
||||||
|
|
||||||
# Basic metrics
|
|
||||||
text_content = content.get("content", "")
|
|
||||||
word_count = len(text_content.split())
|
|
||||||
sentence_count = len(re.split(r'[.!?]+', text_content))
|
|
||||||
paragraph_count = len(re.split(r'\\n\\n+', text_content))
|
|
||||||
|
|
||||||
analysis["metrics"] = {
|
|
||||||
"word_count": word_count,
|
|
||||||
"sentence_count": sentence_count,
|
|
||||||
"paragraph_count": paragraph_count,
|
|
||||||
"avg_words_per_sentence": round(word_count / max(sentence_count, 1), 1),
|
|
||||||
"avg_sentences_per_paragraph": round(sentence_count / max(paragraph_count, 1), 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Heading structure analysis
|
|
||||||
headings = content.get("headings", [])
|
|
||||||
heading_structure = {}
|
|
||||||
for h in headings:
|
|
||||||
level = h["level"]
|
|
||||||
if level not in heading_structure:
|
|
||||||
heading_structure[level] = 0
|
|
||||||
heading_structure[level] += 1
|
|
||||||
|
|
||||||
analysis["heading_structure"] = heading_structure
|
|
||||||
|
|
||||||
# Content age analysis
|
|
||||||
publish_date = content.get("publish_date")
|
|
||||||
if publish_date:
|
|
||||||
try:
|
|
||||||
# Try to parse the date in various formats
|
|
||||||
if "T" in publish_date:
|
|
||||||
# ISO format
|
|
||||||
pub_date = datetime.fromisoformat(publish_date.replace("Z", "+00:00"))
|
|
||||||
else:
|
|
||||||
# Try common date formats
|
|
||||||
date_formats = [
|
|
||||||
"%Y-%m-%d", "%d-%m-%Y", "%B %d, %Y", "%b %d, %Y",
|
|
||||||
"%d %B %Y", "%d %b %Y", "%Y/%m/%d", "%d/%m/%Y"
|
|
||||||
]
|
|
||||||
for fmt in date_formats:
|
|
||||||
try:
|
|
||||||
pub_date = datetime.strptime(publish_date, fmt)
|
|
||||||
break
|
|
||||||
except ValueError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Calculate content age
|
|
||||||
now = datetime.now()
|
|
||||||
age_days = (now - pub_date).days
|
|
||||||
analysis["content_age"] = {
|
|
||||||
"days": age_days,
|
|
||||||
"months": round(age_days / 30, 1),
|
|
||||||
"years": round(age_days / 365, 1)
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Could not parse publish date: {e}")
|
|
||||||
analysis["content_age"] = {"error": "Could not determine content age"}
|
|
||||||
else:
|
|
||||||
analysis["content_age"] = {"error": "No publish date found"}
|
|
||||||
|
|
||||||
# Image analysis
|
|
||||||
images = content.get("images", [])
|
|
||||||
analysis["images"] = {
|
|
||||||
"count": len(images),
|
|
||||||
"with_alt_text": sum(1 for img in images if img.get("alt_text"))
|
|
||||||
}
|
|
||||||
|
|
||||||
return analysis
|
|
||||||
|
|
||||||
def conduct_research(self, title: str, content: str, research_depth: str = "medium") -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Conduct web research to find updated information related to the blog content.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
title: Blog title
|
|
||||||
content: Blog content
|
|
||||||
research_depth: Depth of research (low, medium, high)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing research results
|
|
||||||
"""
|
|
||||||
logger.info(f"Conducting research with depth: {research_depth}")
|
|
||||||
|
|
||||||
# Extract key topics from the content
|
|
||||||
prompt = f"""
|
|
||||||
Extract 3-5 key topics or claims from this blog content that might need fact-checking or updating.
|
|
||||||
For each topic, provide a concise search query that would help find the most recent information.
|
|
||||||
|
|
||||||
Blog title: {title}
|
|
||||||
|
|
||||||
First 1000 characters of content:
|
|
||||||
{content[:1000]}...
|
|
||||||
|
|
||||||
Format your response as a JSON array of objects with 'topic' and 'query' fields.
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
topics_json = llm_text_gen(prompt)
|
|
||||||
# Extract JSON from the response
|
|
||||||
topics_json = re.search(r'\[.*\]', topics_json, re.DOTALL)
|
|
||||||
if topics_json:
|
|
||||||
topics = json.loads(topics_json.group(0))
|
|
||||||
else:
|
|
||||||
# Fallback if JSON extraction fails
|
|
||||||
topics = [
|
|
||||||
{"topic": title, "query": title + " latest information"},
|
|
||||||
{"topic": "Updates on " + title, "query": title + " recent developments"}
|
|
||||||
]
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error extracting topics: {e}")
|
|
||||||
topics = [
|
|
||||||
{"topic": title, "query": title + " latest information"},
|
|
||||||
{"topic": "Updates on " + title, "query": title + " recent developments"}
|
|
||||||
]
|
|
||||||
|
|
||||||
# Determine number of results based on research depth
|
|
||||||
num_results = {"low": 2, "medium": 3, "high": 5}.get(research_depth, 3)
|
|
||||||
|
|
||||||
research_results = {"topics": []}
|
|
||||||
|
|
||||||
# Conduct research for each topic
|
|
||||||
for topic in topics[:3]: # Limit to 3 topics to avoid excessive API calls
|
|
||||||
topic_results = {"topic": topic["topic"], "sources": []}
|
|
||||||
|
|
||||||
# Try Exa search first
|
|
||||||
try:
|
|
||||||
exa_results = exa_search(topic["query"], num_results=num_results)
|
|
||||||
if exa_results:
|
|
||||||
topic_results["sources"].extend(exa_results)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Exa search failed: {e}")
|
|
||||||
|
|
||||||
# If Exa didn't return enough results, try Tavily
|
|
||||||
if len(topic_results["sources"]) < num_results:
|
|
||||||
try:
|
|
||||||
tavily_results = tavily_search(topic["query"], num_results=num_results)
|
|
||||||
if tavily_results:
|
|
||||||
# Avoid duplicates
|
|
||||||
existing_urls = [s["url"] for s in topic_results["sources"]]
|
|
||||||
for result in tavily_results:
|
|
||||||
if result["url"] not in existing_urls:
|
|
||||||
topic_results["sources"].append(result)
|
|
||||||
existing_urls.append(result["url"])
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Tavily search failed: {e}")
|
|
||||||
|
|
||||||
research_results["topics"].append(topic_results)
|
|
||||||
|
|
||||||
return research_results
|
|
||||||
|
|
||||||
def generate_rewrite_prompt(self, original_content: Dict[str, Any],
|
|
||||||
user_preferences: Dict[str, Any],
|
|
||||||
research_results: Dict[str, Any],
|
|
||||||
content_analysis: Dict[str, Any]) -> str:
|
|
||||||
"""
|
|
||||||
Generate a prompt for the LLM to rewrite the blog.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
original_content: Original blog content
|
|
||||||
user_preferences: User preferences for rewriting
|
|
||||||
research_results: Research results for updating content
|
|
||||||
content_analysis: Analysis of the original content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Prompt string for the LLM
|
|
||||||
"""
|
|
||||||
logger.info("Generating rewrite prompt")
|
|
||||||
|
|
||||||
# Extract key information
|
|
||||||
title = original_content.get("title", "")
|
|
||||||
content = original_content.get("content", "")
|
|
||||||
|
|
||||||
# Truncate content if it's too long
|
|
||||||
max_content_length = 6000 # Adjust based on your LLM's context window
|
|
||||||
if len(content) > max_content_length:
|
|
||||||
content_preview = content[:max_content_length] + "...\\n[Content truncated due to length]"
|
|
||||||
else:
|
|
||||||
content_preview = content
|
|
||||||
|
|
||||||
# Format research results
|
|
||||||
research_summary = ""
|
|
||||||
for topic in research_results.get("topics", []):
|
|
||||||
research_summary += f"\\n## {topic['topic']}\\n"
|
|
||||||
for i, source in enumerate(topic.get("sources", [])[:3]): # Limit to 3 sources per topic
|
|
||||||
research_summary += f"Source {i+1}: {source.get('title', 'Untitled')}\\n"
|
|
||||||
research_summary += f"URL: {source.get('url', 'No URL')}\\n"
|
|
||||||
research_summary += f"Content: {source.get('content', 'No content')[:300]}...\\n\\n"
|
|
||||||
|
|
||||||
# Build the prompt
|
|
||||||
prompt = f"""
|
|
||||||
# Blog Rewriting Task
|
|
||||||
|
|
||||||
## Original Blog Information
|
|
||||||
Title: {title}
|
|
||||||
Word Count: {content_analysis.get('metrics', {}).get('word_count', 'Unknown')}
|
|
||||||
Estimated Age: {content_analysis.get('content_age', {}).get('months', 'Unknown')} months
|
|
||||||
|
|
||||||
## Rewriting Instructions
|
|
||||||
Mode: {user_preferences.get('rewrite_mode', 'standard')}
|
|
||||||
Target Tone: {user_preferences.get('tone', 'Professional')}
|
|
||||||
Target Word Count: {user_preferences.get('target_word_count', 'Same as original')}
|
|
||||||
Focus Keywords: {', '.join(user_preferences.get('keywords', []))}
|
|
||||||
|
|
||||||
## Special Instructions
|
|
||||||
{user_preferences.get('special_instructions', 'No special instructions')}
|
|
||||||
|
|
||||||
## Recent Research Findings
|
|
||||||
{research_summary if research_summary else "No research results available."}
|
|
||||||
|
|
||||||
## Original Content
|
|
||||||
{content_preview}
|
|
||||||
|
|
||||||
## Your Task
|
|
||||||
Please rewrite this blog post according to the instructions above. The rewritten blog should:
|
|
||||||
|
|
||||||
1. Maintain the core message and value of the original content
|
|
||||||
2. Update any outdated information based on the research findings
|
|
||||||
3. Adopt the requested tone and style
|
|
||||||
4. Incorporate the focus keywords naturally
|
|
||||||
5. Improve readability and engagement
|
|
||||||
6. Maintain a logical structure with appropriate headings
|
|
||||||
7. Include a compelling introduction and conclusion
|
|
||||||
|
|
||||||
## Output Format
|
|
||||||
Please provide your response in the following JSON format:
|
|
||||||
```json
|
|
||||||
{{
|
|
||||||
"title": "Rewritten title",
|
|
||||||
"meta_description": "SEO-optimized meta description (max 160 characters)",
|
|
||||||
"content": "Full rewritten content with proper markdown formatting",
|
|
||||||
"suggested_images": [
|
|
||||||
{{
|
|
||||||
"description": "Brief description of a suggested image",
|
|
||||||
"caption": "Suggested caption for the image",
|
|
||||||
"placement": "Where this image should be placed (e.g., 'After introduction', 'Before conclusion')"
|
|
||||||
}}
|
|
||||||
]
|
|
||||||
}}
|
|
||||||
```
|
|
||||||
|
|
||||||
Ensure the JSON is properly formatted and valid.
|
|
||||||
"""
|
|
||||||
|
|
||||||
return prompt
|
|
||||||
|
|
||||||
def rewrite_blog(self, original_content: Dict[str, Any],
|
|
||||||
user_preferences: Dict[str, Any],
|
|
||||||
research_results: Dict[str, Any],
|
|
||||||
content_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Rewrite the blog based on original content, user preferences, and research.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
original_content: Original blog content
|
|
||||||
user_preferences: User preferences for rewriting
|
|
||||||
research_results: Research results for updating content
|
|
||||||
content_analysis: Analysis of the original content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing rewritten content
|
|
||||||
"""
|
|
||||||
logger.info("Rewriting blog content")
|
|
||||||
|
|
||||||
# Generate the prompt
|
|
||||||
prompt = self.generate_rewrite_prompt(
|
|
||||||
original_content, user_preferences, research_results, content_analysis
|
|
||||||
)
|
|
||||||
|
|
||||||
# Call the LLM to rewrite the content
|
|
||||||
try:
|
|
||||||
response = llm_text_gen(prompt)
|
|
||||||
|
|
||||||
# Extract JSON from the response
|
|
||||||
json_match = re.search(r'```json\s*(.*?)\s*```', response, re.DOTALL)
|
|
||||||
if json_match:
|
|
||||||
json_str = json_match.group(1)
|
|
||||||
else:
|
|
||||||
json_str = response
|
|
||||||
|
|
||||||
# Clean up the JSON string
|
|
||||||
json_str = re.sub(r'```(json)?', '', json_str).strip()
|
|
||||||
|
|
||||||
# Parse the JSON
|
|
||||||
rewritten_content = json.loads(json_str)
|
|
||||||
|
|
||||||
# Validate the response structure
|
|
||||||
required_fields = ["title", "meta_description", "content"]
|
|
||||||
for field in required_fields:
|
|
||||||
if field not in rewritten_content:
|
|
||||||
rewritten_content[field] = original_content.get(field, "")
|
|
||||||
|
|
||||||
# Ensure suggested_images exists
|
|
||||||
if "suggested_images" not in rewritten_content:
|
|
||||||
rewritten_content["suggested_images"] = []
|
|
||||||
|
|
||||||
return rewritten_content
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error rewriting blog: {e}")
|
|
||||||
return {
|
|
||||||
"title": original_content.get("title", ""),
|
|
||||||
"meta_description": original_content.get("meta_description", ""),
|
|
||||||
"content": original_content.get("content", ""),
|
|
||||||
"suggested_images": [],
|
|
||||||
"error": str(e)
|
|
||||||
}
|
|
||||||
|
|
||||||
def generate_image(self, image_prompt: str, style: str = "realistic") -> str:
|
|
||||||
"""
|
|
||||||
Generate an image based on the prompt.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
image_prompt: Prompt for image generation
|
|
||||||
style: Style of the image
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path to the generated image
|
|
||||||
"""
|
|
||||||
logger.info(f"Generating image with prompt: {image_prompt}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
image_path = generate_image(image_prompt, style=style)
|
|
||||||
return image_path
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating image: {e}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def write_blog_rewriter():
|
def write_blog_rewriter():
|
||||||
"""Main function to display the blog rewriter UI."""
|
"""Main function to display the blog rewriter UI."""
|
||||||
@@ -594,8 +81,7 @@ def write_blog_rewriter():
|
|||||||
else:
|
else:
|
||||||
st.success("Content extracted successfully!")
|
st.success("Content extracted successfully!")
|
||||||
st.session_state.current_step = 2
|
st.session_state.current_step = 2
|
||||||
# Auto-click the next tab
|
st.rerun()
|
||||||
st.experimental_rerun()
|
|
||||||
else:
|
else:
|
||||||
col1, col2 = st.columns([3, 1])
|
col1, col2 = st.columns([3, 1])
|
||||||
|
|
||||||
@@ -642,8 +128,7 @@ def write_blog_rewriter():
|
|||||||
|
|
||||||
st.success("Content imported successfully!")
|
st.success("Content imported successfully!")
|
||||||
st.session_state.current_step = 2
|
st.session_state.current_step = 2
|
||||||
# Auto-click the next tab
|
st.rerun()
|
||||||
st.experimental_rerun()
|
|
||||||
|
|
||||||
# Display the imported content if available
|
# Display the imported content if available
|
||||||
if st.session_state.original_content and "title" in st.session_state.original_content:
|
if st.session_state.original_content and "title" in st.session_state.original_content:
|
||||||
@@ -759,7 +244,7 @@ def write_blog_rewriter():
|
|||||||
if st.session_state.content_analysis and st.session_state.research_results:
|
if st.session_state.content_analysis and st.session_state.research_results:
|
||||||
if st.button("Proceed to Rewrite Settings", type="primary"):
|
if st.button("Proceed to Rewrite Settings", type="primary"):
|
||||||
st.session_state.current_step = 3
|
st.session_state.current_step = 3
|
||||||
st.experimental_rerun()
|
st.rerun()
|
||||||
|
|
||||||
# Tab 3: Rewrite Settings
|
# Tab 3: Rewrite Settings
|
||||||
with tab3:
|
with tab3:
|
||||||
@@ -868,7 +353,7 @@ def write_blog_rewriter():
|
|||||||
else:
|
else:
|
||||||
st.success("Blog rewritten successfully!")
|
st.success("Blog rewritten successfully!")
|
||||||
st.session_state.current_step = 4
|
st.session_state.current_step = 4
|
||||||
st.experimental_rerun()
|
st.rerun()
|
||||||
|
|
||||||
# Tab 4: Results & Export
|
# Tab 4: Results & Export
|
||||||
with tab4:
|
with tab4:
|
||||||
@@ -951,7 +436,7 @@ def write_blog_rewriter():
|
|||||||
}
|
}
|
||||||
|
|
||||||
st.success("Image generated successfully!")
|
st.success("Image generated successfully!")
|
||||||
st.experimental_rerun()
|
st.rerun()
|
||||||
|
|
||||||
# Display the generated image if available
|
# Display the generated image if available
|
||||||
if f"image_{i}" in st.session_state.generated_images:
|
if f"image_{i}" in st.session_state.generated_images:
|
||||||
@@ -998,7 +483,7 @@ def write_blog_rewriter():
|
|||||||
}
|
}
|
||||||
|
|
||||||
st.success("Image generated successfully!")
|
st.success("Image generated successfully!")
|
||||||
st.experimental_rerun()
|
st.rerun()
|
||||||
|
|
||||||
# Display the generated custom image if available
|
# Display the generated custom image if available
|
||||||
if "custom_image" in st.session_state.generated_images:
|
if "custom_image" in st.session_state.generated_images:
|
||||||
@@ -1133,7 +618,7 @@ def write_blog_rewriter():
|
|||||||
if key in st.session_state:
|
if key in st.session_state:
|
||||||
del st.session_state[key]
|
del st.session_state[key]
|
||||||
|
|
||||||
st.experimental_rerun()
|
st.rerun()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
write_blog_rewriter()
|
write_blog_rewriter()
|
||||||
595
lib/ai_writers/blog_rewriter_updater/blog_rewriter_utils.py
Normal file
595
lib/ai_writers/blog_rewriter_updater/blog_rewriter_utils.py
Normal file
@@ -0,0 +1,595 @@
|
|||||||
|
"""
|
||||||
|
Blog Rewriter Utilities Module
|
||||||
|
|
||||||
|
This module contains the core functionality for rewriting and updating blog content,
|
||||||
|
including content extraction, analysis, research, and rewriting capabilities.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
from typing import Dict, List, Tuple, Optional, Any
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Import required modules from the project
|
||||||
|
from ...gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||||
|
from ...gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
|
||||||
|
from ...ai_web_researcher.metaphor_basic_neural_web_search import metaphor_search_articles
|
||||||
|
from ...ai_web_researcher.tavily_ai_search import do_tavily_ai_search
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Define constants
|
||||||
|
MAX_TITLE_LENGTH = 70
|
||||||
|
MAX_META_DESCRIPTION_LENGTH = 160
|
||||||
|
REWRITE_MODES = {
|
||||||
|
"standard": "Standard rewrite with improved clarity and flow",
|
||||||
|
"seo_optimization": "Optimize for search engines with targeted keywords",
|
||||||
|
"simplification": "Simplify complex content for broader audience",
|
||||||
|
"expansion": "Expand with additional details and examples",
|
||||||
|
"fact_check": "Focus on fact-checking and updating information",
|
||||||
|
"tone_shift": "Change the tone while preserving content",
|
||||||
|
"modernization": "Update outdated content with current information"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Define tone options
|
||||||
|
TONE_OPTIONS = [
|
||||||
|
"Professional", "Conversational", "Academic", "Enthusiastic",
|
||||||
|
"Authoritative", "Friendly", "Technical", "Inspirational"
|
||||||
|
]
|
||||||
|
|
||||||
|
class BlogRewriter:
|
||||||
|
"""Class to handle blog rewriting functionality."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize the BlogRewriter class."""
|
||||||
|
self.original_content = {}
|
||||||
|
self.rewritten_content = {}
|
||||||
|
self.research_results = {}
|
||||||
|
self.content_analysis = {}
|
||||||
|
self.image_suggestions = []
|
||||||
|
|
||||||
|
def extract_content_from_url(self, url: str) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Extract content from a given URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to extract content from
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary containing extracted content
|
||||||
|
"""
|
||||||
|
logger.info(f"Extracting content from URL: {url}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||||
|
'Accept-Language': 'en-US,en;q=0.5',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Upgrade-Insecure-Requests': '1',
|
||||||
|
'Cache-Control': 'max-age=0'
|
||||||
|
}
|
||||||
|
response = requests.get(url, headers=headers, timeout=15)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
# Extract title
|
||||||
|
title = soup.title.string if soup.title else ""
|
||||||
|
|
||||||
|
# Extract meta description
|
||||||
|
meta_desc = ""
|
||||||
|
meta_tag = soup.find("meta", attrs={"name": "description"})
|
||||||
|
if meta_tag and "content" in meta_tag.attrs:
|
||||||
|
meta_desc = meta_tag["content"]
|
||||||
|
|
||||||
|
# Extract main content - try multiple strategies
|
||||||
|
content = ""
|
||||||
|
|
||||||
|
# Strategy 1: Look for article tag
|
||||||
|
article_tag = soup.find("article")
|
||||||
|
if article_tag:
|
||||||
|
content = article_tag.get_text(separator="\n\n")
|
||||||
|
|
||||||
|
# Strategy 2: Look for main content areas
|
||||||
|
if not content:
|
||||||
|
main_content = soup.find(["main", "div", "section"], class_=re.compile(r"content|article|post|entry|main|body"))
|
||||||
|
if main_content:
|
||||||
|
for elem in main_content.find_all(["nav", "aside", "footer", "comments", "script", "style", "header"]):
|
||||||
|
elem.decompose()
|
||||||
|
content = main_content.get_text(separator="\n\n")
|
||||||
|
|
||||||
|
# Strategy 3: Look for specific content classes
|
||||||
|
if not content:
|
||||||
|
content_classes = ["post-content", "entry-content", "article-content", "blog-content", "content-area"]
|
||||||
|
for class_name in content_classes:
|
||||||
|
content_div = soup.find("div", class_=class_name)
|
||||||
|
if content_div:
|
||||||
|
for elem in content_div.find_all(["nav", "aside", "footer", "comments", "script", "style", "header"]):
|
||||||
|
elem.decompose()
|
||||||
|
content = content_div.get_text(separator="\n\n")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Strategy 4: Look for content within body
|
||||||
|
if not content:
|
||||||
|
body = soup.find("body")
|
||||||
|
if body:
|
||||||
|
# Remove unwanted elements
|
||||||
|
for elem in body.find_all(["nav", "aside", "footer", "comments", "script", "style", "header"]):
|
||||||
|
elem.decompose()
|
||||||
|
content = body.get_text(separator="\n\n")
|
||||||
|
|
||||||
|
# Clean up the content
|
||||||
|
content = re.sub(r'\n{3,}', '\n\n', content)
|
||||||
|
content = re.sub(r'\s{2,}', ' ', content)
|
||||||
|
content = content.strip()
|
||||||
|
|
||||||
|
# Extract headings with their hierarchy
|
||||||
|
headings = []
|
||||||
|
for h in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
|
||||||
|
headings.append({
|
||||||
|
"level": int(h.name[1]),
|
||||||
|
"text": h.get_text().strip()
|
||||||
|
})
|
||||||
|
|
||||||
|
# Extract images with more metadata
|
||||||
|
images = []
|
||||||
|
for img in soup.find_all("img"):
|
||||||
|
if img.get("src") and not img.get("src").startswith("data:"):
|
||||||
|
image_url = img.get("src")
|
||||||
|
if not image_url.startswith(("http://", "https://")):
|
||||||
|
base_url = "/".join(url.split("/")[:3])
|
||||||
|
image_url = f"{base_url}/{image_url.lstrip('/')}"
|
||||||
|
|
||||||
|
images.append({
|
||||||
|
"url": image_url,
|
||||||
|
"alt_text": img.get("alt", ""),
|
||||||
|
"title": img.get("title", ""),
|
||||||
|
"class": img.get("class", []),
|
||||||
|
"width": img.get("width"),
|
||||||
|
"height": img.get("height")
|
||||||
|
})
|
||||||
|
|
||||||
|
# Extract publish date with multiple strategies
|
||||||
|
publish_date = None
|
||||||
|
# Try meta tags first
|
||||||
|
date_meta = soup.find("meta", attrs={"property": "article:published_time"})
|
||||||
|
if date_meta and "content" in date_meta.attrs:
|
||||||
|
publish_date = date_meta["content"]
|
||||||
|
else:
|
||||||
|
# Try other meta tags
|
||||||
|
for prop in ["datePublished", "dateCreated", "dateModified"]:
|
||||||
|
date_meta = soup.find("meta", attrs={"property": prop})
|
||||||
|
if date_meta and "content" in date_meta.attrs:
|
||||||
|
publish_date = date_meta["content"]
|
||||||
|
break
|
||||||
|
|
||||||
|
# Try HTML elements if meta tags failed
|
||||||
|
if not publish_date:
|
||||||
|
date_elem = soup.find(["time", "span", "div"], class_=re.compile(r"date|time|publish|posted|created"))
|
||||||
|
if date_elem and date_elem.get_text():
|
||||||
|
publish_date = date_elem.get_text().strip()
|
||||||
|
|
||||||
|
# Extract author with multiple strategies
|
||||||
|
author = None
|
||||||
|
# Try meta tags first
|
||||||
|
author_meta = soup.find("meta", attrs={"name": "author"})
|
||||||
|
if author_meta and "content" in author_meta.attrs:
|
||||||
|
author = author_meta["content"]
|
||||||
|
else:
|
||||||
|
# Try other meta tags
|
||||||
|
for prop in ["article:author", "author"]:
|
||||||
|
author_meta = soup.find("meta", attrs={"property": prop})
|
||||||
|
if author_meta and "content" in author_meta.attrs:
|
||||||
|
author = author_meta["content"]
|
||||||
|
break
|
||||||
|
|
||||||
|
# Try HTML elements if meta tags failed
|
||||||
|
if not author:
|
||||||
|
author_elem = soup.find(["a", "span", "div"], class_=re.compile(r"author|byline|writer|posted-by"))
|
||||||
|
if author_elem and author_elem.get_text():
|
||||||
|
author = author_elem.get_text().strip()
|
||||||
|
|
||||||
|
# Log content extraction results
|
||||||
|
logger.info(f"Extracted content length: {len(content)} characters")
|
||||||
|
logger.info(f"Found {len(headings)} headings")
|
||||||
|
logger.info(f"Found {len(images)} images")
|
||||||
|
logger.info(f"Publish date: {publish_date}")
|
||||||
|
logger.info(f"Author: {author}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"title": title,
|
||||||
|
"meta_description": meta_desc,
|
||||||
|
"content": content,
|
||||||
|
"headings": headings,
|
||||||
|
"images": images,
|
||||||
|
"publish_date": publish_date,
|
||||||
|
"author": author,
|
||||||
|
"url": url
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting content from URL: {e}")
|
||||||
|
return {
|
||||||
|
"title": "",
|
||||||
|
"meta_description": "",
|
||||||
|
"content": "",
|
||||||
|
"headings": [],
|
||||||
|
"images": [],
|
||||||
|
"publish_date": None,
|
||||||
|
"author": None,
|
||||||
|
"url": url,
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
def analyze_content(self, content: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Analyze the extracted content to provide insights.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: Dictionary containing extracted content
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary containing content analysis
|
||||||
|
"""
|
||||||
|
logger.info("Analyzing content")
|
||||||
|
|
||||||
|
analysis = {}
|
||||||
|
|
||||||
|
# Basic metrics
|
||||||
|
text_content = content.get("content", "")
|
||||||
|
word_count = len(text_content.split())
|
||||||
|
sentence_count = len(re.split(r'[.!?]+', text_content))
|
||||||
|
paragraph_count = len(re.split(r'\n\n+', text_content))
|
||||||
|
|
||||||
|
analysis["metrics"] = {
|
||||||
|
"word_count": word_count,
|
||||||
|
"sentence_count": sentence_count,
|
||||||
|
"paragraph_count": paragraph_count,
|
||||||
|
"avg_words_per_sentence": round(word_count / max(sentence_count, 1), 1),
|
||||||
|
"avg_sentences_per_paragraph": round(sentence_count / max(paragraph_count, 1), 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Heading structure analysis
|
||||||
|
headings = content.get("headings", [])
|
||||||
|
heading_structure = {}
|
||||||
|
for h in headings:
|
||||||
|
level = h["level"]
|
||||||
|
if level not in heading_structure:
|
||||||
|
heading_structure[level] = 0
|
||||||
|
heading_structure[level] += 1
|
||||||
|
|
||||||
|
analysis["heading_structure"] = heading_structure
|
||||||
|
|
||||||
|
# Content age analysis
|
||||||
|
publish_date = content.get("publish_date")
|
||||||
|
if publish_date:
|
||||||
|
try:
|
||||||
|
if "T" in publish_date:
|
||||||
|
pub_date = datetime.fromisoformat(publish_date.replace("Z", "+00:00"))
|
||||||
|
else:
|
||||||
|
date_formats = [
|
||||||
|
"%Y-%m-%d", "%d-%m-%Y", "%B %d, %Y", "%b %d, %Y",
|
||||||
|
"%d %B %Y", "%d %b %Y", "%Y/%m/%d", "%d/%m/%Y"
|
||||||
|
]
|
||||||
|
for fmt in date_formats:
|
||||||
|
try:
|
||||||
|
pub_date = datetime.strptime(publish_date, fmt)
|
||||||
|
break
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
now = datetime.now()
|
||||||
|
age_days = (now - pub_date).days
|
||||||
|
analysis["content_age"] = {
|
||||||
|
"days": age_days,
|
||||||
|
"months": round(age_days / 30, 1),
|
||||||
|
"years": round(age_days / 365, 1)
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not parse publish date: {e}")
|
||||||
|
analysis["content_age"] = {"error": "Could not determine content age"}
|
||||||
|
else:
|
||||||
|
analysis["content_age"] = {"error": "No publish date found"}
|
||||||
|
|
||||||
|
# Image analysis
|
||||||
|
images = content.get("images", [])
|
||||||
|
analysis["images"] = {
|
||||||
|
"count": len(images),
|
||||||
|
"with_alt_text": sum(1 for img in images if img.get("alt_text"))
|
||||||
|
}
|
||||||
|
|
||||||
|
return analysis
|
||||||
|
|
||||||
|
def conduct_research(self, title: str, content: str, research_depth: str = "medium") -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Conduct web research to find updated information related to the blog content.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title: Blog title
|
||||||
|
content: Blog content
|
||||||
|
research_depth: Depth of research (low, medium, high)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary containing research results
|
||||||
|
"""
|
||||||
|
logger.info(f"Conducting research with depth: {research_depth}")
|
||||||
|
|
||||||
|
# Extract key topics from the content
|
||||||
|
prompt = f"""
|
||||||
|
Extract 3-5 key topics or claims from this blog content that might need fact-checking or updating.
|
||||||
|
For each topic, provide a concise search query that would help find the most recent information.
|
||||||
|
|
||||||
|
Blog title: {title}
|
||||||
|
|
||||||
|
First 1000 characters of content:
|
||||||
|
{content[:1000]}...
|
||||||
|
|
||||||
|
Format your response as a JSON array of objects with 'topic' and 'query' fields.
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
topics_json = llm_text_gen(prompt)
|
||||||
|
topics_json = re.search(r'\[.*\]', topics_json, re.DOTALL)
|
||||||
|
if topics_json:
|
||||||
|
topics = json.loads(topics_json.group(0))
|
||||||
|
else:
|
||||||
|
topics = [
|
||||||
|
{"topic": title, "query": title + " latest information"},
|
||||||
|
{"topic": "Updates on " + title, "query": title + " recent developments"}
|
||||||
|
]
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting topics: {e}")
|
||||||
|
topics = [
|
||||||
|
{"topic": title, "query": title + " latest information"},
|
||||||
|
{"topic": "Updates on " + title, "query": title + " recent developments"}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Determine number of results based on research depth
|
||||||
|
num_results = {"low": 2, "medium": 3, "high": 5}.get(research_depth, 3)
|
||||||
|
|
||||||
|
research_results = {"topics": []}
|
||||||
|
|
||||||
|
# Conduct research for each topic
|
||||||
|
for topic in topics[:3]: # Limit to 3 topics
|
||||||
|
topic_results = {"topic": topic["topic"], "sources": []}
|
||||||
|
|
||||||
|
# Try Exa search first
|
||||||
|
try:
|
||||||
|
exa_results = metaphor_search_articles(topic["query"], num_results=num_results)
|
||||||
|
if exa_results:
|
||||||
|
topic_results["sources"].extend(exa_results)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Exa search failed: {e}")
|
||||||
|
|
||||||
|
# If Exa didn't return enough results, try Tavily
|
||||||
|
if len(topic_results["sources"]) < num_results:
|
||||||
|
try:
|
||||||
|
tavily_results = do_tavily_ai_search(topic["query"], num_results=num_results)
|
||||||
|
if tavily_results:
|
||||||
|
existing_urls = [s["url"] for s in topic_results["sources"]]
|
||||||
|
for result in tavily_results:
|
||||||
|
if result["url"] not in existing_urls:
|
||||||
|
topic_results["sources"].append(result)
|
||||||
|
existing_urls.append(result["url"])
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Tavily search failed: {e}")
|
||||||
|
|
||||||
|
research_results["topics"].append(topic_results)
|
||||||
|
|
||||||
|
return research_results
|
||||||
|
|
||||||
|
def generate_rewrite_prompt(self, original_content: Dict[str, Any],
|
||||||
|
user_preferences: Dict[str, Any],
|
||||||
|
research_results: Dict[str, Any],
|
||||||
|
content_analysis: Dict[str, Any]) -> str:
|
||||||
|
"""
|
||||||
|
Generate a prompt for the LLM to rewrite the blog.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
original_content: Original blog content
|
||||||
|
user_preferences: User preferences for rewriting
|
||||||
|
research_results: Research results for updating content
|
||||||
|
content_analysis: Analysis of the original content
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Prompt string for the LLM
|
||||||
|
"""
|
||||||
|
logger.info("Generating rewrite prompt")
|
||||||
|
|
||||||
|
# Extract key information
|
||||||
|
title = original_content.get("title", "")
|
||||||
|
content = original_content.get("content", "")
|
||||||
|
|
||||||
|
# Truncate content if it's too long
|
||||||
|
max_content_length = 6000
|
||||||
|
if len(content) > max_content_length:
|
||||||
|
content_preview = content[:max_content_length] + "...\n[Content truncated due to length]"
|
||||||
|
else:
|
||||||
|
content_preview = content
|
||||||
|
|
||||||
|
# Format research results
|
||||||
|
research_summary = ""
|
||||||
|
for topic in research_results.get("topics", []):
|
||||||
|
research_summary += f"\n## {topic['topic']}\n"
|
||||||
|
for i, source in enumerate(topic.get("sources", [])[:3]):
|
||||||
|
research_summary += f"Source {i+1}: {source.get('title', 'Untitled')}\n"
|
||||||
|
research_summary += f"URL: {source.get('url', 'No URL')}\n"
|
||||||
|
research_summary += f"Content: {source.get('content', 'No content')[:300]}...\n\n"
|
||||||
|
|
||||||
|
# Build the prompt
|
||||||
|
prompt = f"""
|
||||||
|
# Blog Rewriting Task
|
||||||
|
|
||||||
|
## Original Blog Information
|
||||||
|
Title: {title}
|
||||||
|
Word Count: {content_analysis.get('metrics', {}).get('word_count', 'Unknown')}
|
||||||
|
Estimated Age: {content_analysis.get('content_age', {}).get('months', 'Unknown')} months
|
||||||
|
|
||||||
|
## Rewriting Instructions
|
||||||
|
Mode: {user_preferences.get('rewrite_mode', 'standard')}
|
||||||
|
Target Tone: {user_preferences.get('tone', 'Professional')}
|
||||||
|
Target Word Count: {user_preferences.get('target_word_count', 'Same as original')}
|
||||||
|
Focus Keywords: {', '.join(user_preferences.get('keywords', []))}
|
||||||
|
|
||||||
|
## Special Instructions
|
||||||
|
{user_preferences.get('special_instructions', 'No special instructions')}
|
||||||
|
|
||||||
|
## Recent Research Findings
|
||||||
|
{research_summary if research_summary else "No research results available."}
|
||||||
|
|
||||||
|
## Original Content
|
||||||
|
{content_preview}
|
||||||
|
|
||||||
|
## Your Task
|
||||||
|
Please rewrite this blog post according to the instructions above. The rewritten blog should:
|
||||||
|
|
||||||
|
1. Maintain the core message and value of the original content
|
||||||
|
2. Update any outdated information based on the research findings
|
||||||
|
3. Adopt the requested tone and style
|
||||||
|
4. Incorporate the focus keywords naturally
|
||||||
|
5. Improve readability and engagement
|
||||||
|
6. Maintain a logical structure with appropriate headings
|
||||||
|
7. Include a compelling introduction and conclusion
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
Please provide your response in the following JSON format:
|
||||||
|
```json
|
||||||
|
{{
|
||||||
|
"title": "Rewritten title",
|
||||||
|
"meta_description": "SEO-optimized meta description (max 160 characters)",
|
||||||
|
"content": "Full rewritten content with proper markdown formatting",
|
||||||
|
"suggested_images": [
|
||||||
|
{{
|
||||||
|
"description": "Brief description of a suggested image",
|
||||||
|
"caption": "Suggested caption for the image",
|
||||||
|
"placement": "Where this image should be placed (e.g., 'After introduction', 'Before conclusion')"
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
```
|
||||||
|
|
||||||
|
Ensure the JSON is properly formatted and valid.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return prompt
|
||||||
|
|
||||||
|
def rewrite_blog(self, original_content: Dict[str, Any],
|
||||||
|
user_preferences: Dict[str, Any],
|
||||||
|
research_results: Dict[str, Any],
|
||||||
|
content_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Rewrite the blog based on original content, user preferences, and research.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
original_content: Original blog content
|
||||||
|
user_preferences: User preferences for rewriting
|
||||||
|
research_results: Research results for updating content
|
||||||
|
content_analysis: Analysis of the original content
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary containing rewritten content
|
||||||
|
"""
|
||||||
|
logger.info("Rewriting blog content")
|
||||||
|
|
||||||
|
# Generate the prompt
|
||||||
|
prompt = self.generate_rewrite_prompt(
|
||||||
|
original_content, user_preferences, research_results, content_analysis
|
||||||
|
)
|
||||||
|
|
||||||
|
# Call the LLM to rewrite the content
|
||||||
|
try:
|
||||||
|
response = llm_text_gen(prompt)
|
||||||
|
|
||||||
|
# Clean the response of any invalid control characters
|
||||||
|
response = ''.join(char for char in response if ord(char) >= 32 or char in '\n\r\t')
|
||||||
|
|
||||||
|
# Extract JSON from the response
|
||||||
|
json_match = re.search(r'```json\s*(.*?)\s*```', response, re.DOTALL)
|
||||||
|
if json_match:
|
||||||
|
json_str = json_match.group(1)
|
||||||
|
else:
|
||||||
|
# If no JSON block found, try to find JSON-like content
|
||||||
|
json_match = re.search(r'\{.*\}', response, re.DOTALL)
|
||||||
|
if json_match:
|
||||||
|
json_str = json_match.group(0)
|
||||||
|
else:
|
||||||
|
json_str = response
|
||||||
|
|
||||||
|
# Clean up the JSON string
|
||||||
|
json_str = re.sub(r'```(json)?', '', json_str).strip()
|
||||||
|
|
||||||
|
# Remove any remaining invalid control characters
|
||||||
|
json_str = ''.join(char for char in json_str if ord(char) >= 32 or char in '\n\r\t')
|
||||||
|
|
||||||
|
# Parse the JSON with error handling
|
||||||
|
try:
|
||||||
|
rewritten_content = json.loads(json_str)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f"JSON parsing error: {e}")
|
||||||
|
# Try to fix common JSON issues
|
||||||
|
json_str = json_str.replace('\\n', '\\\\n') # Fix escaped newlines
|
||||||
|
json_str = json_str.replace('\\"', '"') # Fix escaped quotes
|
||||||
|
json_str = json_str.replace('\\t', '\\\\t') # Fix escaped tabs
|
||||||
|
rewritten_content = json.loads(json_str)
|
||||||
|
|
||||||
|
# Validate the response structure
|
||||||
|
required_fields = ["title", "meta_description", "content"]
|
||||||
|
for field in required_fields:
|
||||||
|
if field not in rewritten_content:
|
||||||
|
rewritten_content[field] = original_content.get(field, "")
|
||||||
|
logger.warning(f"Missing required field '{field}' in rewritten content")
|
||||||
|
|
||||||
|
# Ensure suggested_images exists
|
||||||
|
if "suggested_images" not in rewritten_content:
|
||||||
|
rewritten_content["suggested_images"] = []
|
||||||
|
|
||||||
|
# Clean up the content field
|
||||||
|
if "content" in rewritten_content:
|
||||||
|
# Remove any remaining invalid control characters
|
||||||
|
rewritten_content["content"] = ''.join(
|
||||||
|
char for char in rewritten_content["content"]
|
||||||
|
if ord(char) >= 32 or char in '\n\r\t'
|
||||||
|
)
|
||||||
|
# Normalize whitespace
|
||||||
|
rewritten_content["content"] = re.sub(r'\s+', ' ', rewritten_content["content"])
|
||||||
|
rewritten_content["content"] = re.sub(r'\n{3,}', '\n\n', rewritten_content["content"])
|
||||||
|
|
||||||
|
return rewritten_content
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error rewriting blog: {e}")
|
||||||
|
return {
|
||||||
|
"title": original_content.get("title", ""),
|
||||||
|
"meta_description": original_content.get("meta_description", ""),
|
||||||
|
"content": original_content.get("content", ""),
|
||||||
|
"suggested_images": [],
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
def generate_image(self, image_prompt: str, style: str = "realistic") -> str:
|
||||||
|
"""
|
||||||
|
Generate an image based on the prompt.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_prompt: Prompt for image generation
|
||||||
|
style: Style of the image
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to the generated image
|
||||||
|
"""
|
||||||
|
logger.info(f"Generating image with prompt: {image_prompt}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
image_path = generate_image(image_prompt, style=style)
|
||||||
|
return image_path
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating image: {e}")
|
||||||
|
return ""
|
||||||
@@ -335,7 +335,7 @@ def do_web_research():
|
|||||||
# Define the research options dialog function
|
# Define the research options dialog function
|
||||||
@st.dialog("🔍 Research Options", width="large")
|
@st.dialog("🔍 Research Options", width="large")
|
||||||
def show_research_options():
|
def show_research_options():
|
||||||
tab1, tab2, tab3 = st.tabs(["Basic", "Advanced", "Technical"])
|
tab1, tab2 = st.tabs(["Basic", "Advanced"])
|
||||||
|
|
||||||
with tab1:
|
with tab1:
|
||||||
st.session_state.research_options["related_keywords"] = st.text_input(
|
st.session_state.research_options["related_keywords"] = st.text_input(
|
||||||
@@ -400,7 +400,10 @@ def do_web_research():
|
|||||||
help="Time period for research results"
|
help="Time period for research results"
|
||||||
)
|
)
|
||||||
|
|
||||||
with tab3:
|
# Add the technical options to the Advanced tab
|
||||||
|
st.markdown("---")
|
||||||
|
st.markdown("### Advanced Search Parameters")
|
||||||
|
|
||||||
st.session_state.research_options["include_domains"] = st.text_input(
|
st.session_state.research_options["include_domains"] = st.text_input(
|
||||||
"Include Domains",
|
"Include Domains",
|
||||||
value=st.session_state.research_options["include_domains"],
|
value=st.session_state.research_options["include_domains"],
|
||||||
@@ -415,31 +418,6 @@ def do_web_research():
|
|||||||
help="Find content similar to this URL"
|
help="Find content similar to this URL"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Research method selection
|
|
||||||
st.markdown("### Select Research Method")
|
|
||||||
search_options = [
|
|
||||||
("google", "🔍 Google Search", "Traditional web research with AI analysis", bool(api_keys['SERPER_API_KEY'])),
|
|
||||||
("ai", "🤖 AI Search", "Neural search with semantic analysis", bool(api_keys['METAPHOR_API_KEY'] and api_keys['TAVILY_API_KEY'])),
|
|
||||||
("deep", "🔬 Deep Search (Beta)", "Advanced deep web analysis", bool(all(api_keys.values())))
|
|
||||||
]
|
|
||||||
|
|
||||||
enabled_options = [opt[1] for opt in search_options if opt[3]]
|
|
||||||
if enabled_options:
|
|
||||||
selected_option = st.radio(
|
|
||||||
"Search Method",
|
|
||||||
options=enabled_options,
|
|
||||||
horizontal=True,
|
|
||||||
help="Choose your preferred research method"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Map the selected option to the search_mode value
|
|
||||||
for mode, label, _, _ in search_options:
|
|
||||||
if label == selected_option:
|
|
||||||
st.session_state.research_options["search_mode"] = mode
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
st.warning("No search methods available. Please configure API keys.")
|
|
||||||
|
|
||||||
col1, col2 = st.columns([1, 1])
|
col1, col2 = st.columns([1, 1])
|
||||||
with col1:
|
with col1:
|
||||||
if st.button("Apply", use_container_width=True, type="primary"):
|
if st.button("Apply", use_container_width=True, type="primary"):
|
||||||
@@ -477,6 +455,31 @@ def do_web_research():
|
|||||||
if st.button("Research Options", use_container_width=True):
|
if st.button("Research Options", use_container_width=True):
|
||||||
show_research_options()
|
show_research_options()
|
||||||
|
|
||||||
|
# Research method selection in main container
|
||||||
|
st.markdown("### Select Research Method")
|
||||||
|
search_options = [
|
||||||
|
("google", "🔍 Google Search", "Traditional web research with AI analysis", bool(api_keys['SERPER_API_KEY'])),
|
||||||
|
("ai", "🤖 AI Search", "Neural search with semantic analysis", bool(api_keys['METAPHOR_API_KEY'] and api_keys['TAVILY_API_KEY'])),
|
||||||
|
("deep", "🔬 Deep Search (Beta)", "Advanced deep web analysis", bool(all(api_keys.values())))
|
||||||
|
]
|
||||||
|
|
||||||
|
enabled_options = [opt[1] for opt in search_options if opt[3]]
|
||||||
|
if enabled_options:
|
||||||
|
selected_option = st.radio(
|
||||||
|
"Search Method",
|
||||||
|
options=enabled_options,
|
||||||
|
horizontal=True,
|
||||||
|
help="Choose your preferred research method"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Map the selected option to the search_mode value
|
||||||
|
for mode, label, _, _ in search_options:
|
||||||
|
if label == selected_option:
|
||||||
|
st.session_state.research_options["search_mode"] = mode
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
st.warning("No search methods available. Please configure API keys.")
|
||||||
|
|
||||||
# Execute search button
|
# Execute search button
|
||||||
if st.button("🔍 Start Research", type="primary", use_container_width=True):
|
if st.button("🔍 Start Research", type="primary", use_container_width=True):
|
||||||
if not st.session_state.research_options["primary_keywords"]:
|
if not st.session_state.research_options["primary_keywords"]:
|
||||||
|
|||||||
@@ -243,4 +243,4 @@ Ensure the response is valid JSON."""
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[AsyncWebCrawlerService.analyze_content_with_llm] Error analyzing content with LLM: {str(e)}")
|
logger.error(f"[AsyncWebCrawlerService.analyze_content_with_llm] Error analyzing content with LLM: {str(e)}")
|
||||||
return {}
|
return {}
|
||||||
Reference in New Issue
Block a user