Files
ALwrity/lib/ai_writers/ai_blog_rewriter.py
2025-04-30 16:06:33 +05:30

1139 lines
50 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
AI Blog Rewriter Module
This module provides functionality to rewrite and update existing blog content
with improved quality, factual accuracy, and SEO optimization.
"""
import streamlit as st
import requests
from bs4 import BeautifulSoup
import re
import time
import logging
from typing import Dict, List, Tuple, Optional, Any
import json
import os
from datetime import datetime
# Import required modules from the project
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
from ..gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
from ..web_research.exa_search import exa_search
from ..web_research.tavily_search import tavily_search
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Define constants
MAX_TITLE_LENGTH = 70
MAX_META_DESCRIPTION_LENGTH = 160
REWRITE_MODES = {
"standard": "Standard rewrite with improved clarity and flow",
"seo_optimization": "Optimize for search engines with targeted keywords",
"simplification": "Simplify complex content for broader audience",
"expansion": "Expand with additional details and examples",
"fact_check": "Focus on fact-checking and updating information",
"tone_shift": "Change the tone while preserving content",
"modernization": "Update outdated content with current information"
}
# Define tone options
TONE_OPTIONS = [
"Professional", "Conversational", "Academic", "Enthusiastic",
"Authoritative", "Friendly", "Technical", "Inspirational"
]
class BlogRewriter:
"""Class to handle blog rewriting functionality."""
def __init__(self):
"""Initialize the BlogRewriter class."""
self.original_content = {}
self.rewritten_content = {}
self.research_results = {}
self.content_analysis = {}
self.image_suggestions = []
def extract_content_from_url(self, url: str) -> Dict[str, Any]:
"""
Extract content from a given URL.
Args:
url: The URL to extract content from
Returns:
Dictionary containing extracted content
"""
logger.info(f"Extracting content from URL: {url}")
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers, timeout=15)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Extract title
title = soup.title.string if soup.title else ""
# Extract meta description
meta_desc = ""
meta_tag = soup.find("meta", attrs={"name": "description"})
if meta_tag and "content" in meta_tag.attrs:
meta_desc = meta_tag["content"]
# Extract main content - this is a simplified approach
# In a real implementation, you'd want more sophisticated content extraction
content = ""
article_tag = soup.find("article")
if article_tag:
content = article_tag.get_text(separator="\\n\\n")
else:
# Try to find main content by looking for common content containers
main_content = soup.find(["main", "div", "section"], class_=re.compile(r"content|article|post|entry"))
if main_content:
# Remove navigation, sidebars, comments, etc.
for elem in main_content.find_all(["nav", "aside", "footer", "comments", "script", "style"]):
elem.decompose()
content = main_content.get_text(separator="\\n\\n")
else:
# Fallback to body content
body = soup.find("body")
if body:
content = body.get_text(separator="\\n\\n")
# Clean up the content
content = re.sub(r'\\n{3,}', '\\n\\n', content) # Remove excessive newlines
content = re.sub(r'\s{2,}', ' ', content) # Remove excessive spaces
# Extract headings for structure analysis
headings = []
for h in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
headings.append({
"level": int(h.name[1]),
"text": h.get_text().strip()
})
# Extract images
images = []
for img in soup.find_all("img"):
if img.get("src") and not img.get("src").startswith("data:"):
image_url = img.get("src")
if not image_url.startswith(("http://", "https://")):
# Convert relative URL to absolute
base_url = "/".join(url.split("/")[:3]) # Get domain
image_url = f"{base_url}/{image_url.lstrip('/')}"
alt_text = img.get("alt", "")
images.append({
"url": image_url,
"alt_text": alt_text
})
# Extract publish date if available
publish_date = None
date_meta = soup.find("meta", attrs={"property": "article:published_time"})
if date_meta and "content" in date_meta.attrs:
publish_date = date_meta["content"]
else:
# Try common date patterns in the HTML
date_elem = soup.find(["time", "span", "div"], class_=re.compile(r"date|time|publish"))
if date_elem and date_elem.get_text():
publish_date = date_elem.get_text().strip()
# Extract author if available
author = None
author_meta = soup.find("meta", attrs={"name": "author"})
if author_meta and "content" in author_meta.attrs:
author = author_meta["content"]
else:
# Try common author patterns in the HTML
author_elem = soup.find(["a", "span", "div"], class_=re.compile(r"author|byline"))
if author_elem and author_elem.get_text():
author = author_elem.get_text().strip()
return {
"title": title,
"meta_description": meta_desc,
"content": content,
"headings": headings,
"images": images,
"publish_date": publish_date,
"author": author,
"url": url
}
except Exception as e:
logger.error(f"Error extracting content from URL: {e}")
return {
"title": "",
"meta_description": "",
"content": "",
"headings": [],
"images": [],
"publish_date": None,
"author": None,
"url": url,
"error": str(e)
}
def analyze_content(self, content: Dict[str, Any]) -> Dict[str, Any]:
"""
Analyze the extracted content to provide insights.
Args:
content: Dictionary containing extracted content
Returns:
Dictionary containing content analysis
"""
logger.info("Analyzing content")
analysis = {}
# Basic metrics
text_content = content.get("content", "")
word_count = len(text_content.split())
sentence_count = len(re.split(r'[.!?]+', text_content))
paragraph_count = len(re.split(r'\\n\\n+', text_content))
analysis["metrics"] = {
"word_count": word_count,
"sentence_count": sentence_count,
"paragraph_count": paragraph_count,
"avg_words_per_sentence": round(word_count / max(sentence_count, 1), 1),
"avg_sentences_per_paragraph": round(sentence_count / max(paragraph_count, 1), 1)
}
# Heading structure analysis
headings = content.get("headings", [])
heading_structure = {}
for h in headings:
level = h["level"]
if level not in heading_structure:
heading_structure[level] = 0
heading_structure[level] += 1
analysis["heading_structure"] = heading_structure
# Content age analysis
publish_date = content.get("publish_date")
if publish_date:
try:
# Try to parse the date in various formats
if "T" in publish_date:
# ISO format
pub_date = datetime.fromisoformat(publish_date.replace("Z", "+00:00"))
else:
# Try common date formats
date_formats = [
"%Y-%m-%d", "%d-%m-%Y", "%B %d, %Y", "%b %d, %Y",
"%d %B %Y", "%d %b %Y", "%Y/%m/%d", "%d/%m/%Y"
]
for fmt in date_formats:
try:
pub_date = datetime.strptime(publish_date, fmt)
break
except ValueError:
continue
# Calculate content age
now = datetime.now()
age_days = (now - pub_date).days
analysis["content_age"] = {
"days": age_days,
"months": round(age_days / 30, 1),
"years": round(age_days / 365, 1)
}
except Exception as e:
logger.warning(f"Could not parse publish date: {e}")
analysis["content_age"] = {"error": "Could not determine content age"}
else:
analysis["content_age"] = {"error": "No publish date found"}
# Image analysis
images = content.get("images", [])
analysis["images"] = {
"count": len(images),
"with_alt_text": sum(1 for img in images if img.get("alt_text"))
}
return analysis
def conduct_research(self, title: str, content: str, research_depth: str = "medium") -> Dict[str, Any]:
"""
Conduct web research to find updated information related to the blog content.
Args:
title: Blog title
content: Blog content
research_depth: Depth of research (low, medium, high)
Returns:
Dictionary containing research results
"""
logger.info(f"Conducting research with depth: {research_depth}")
# Extract key topics from the content
prompt = f"""
Extract 3-5 key topics or claims from this blog content that might need fact-checking or updating.
For each topic, provide a concise search query that would help find the most recent information.
Blog title: {title}
First 1000 characters of content:
{content[:1000]}...
Format your response as a JSON array of objects with 'topic' and 'query' fields.
"""
try:
topics_json = llm_text_gen(prompt)
# Extract JSON from the response
topics_json = re.search(r'\[.*\]', topics_json, re.DOTALL)
if topics_json:
topics = json.loads(topics_json.group(0))
else:
# Fallback if JSON extraction fails
topics = [
{"topic": title, "query": title + " latest information"},
{"topic": "Updates on " + title, "query": title + " recent developments"}
]
except Exception as e:
logger.error(f"Error extracting topics: {e}")
topics = [
{"topic": title, "query": title + " latest information"},
{"topic": "Updates on " + title, "query": title + " recent developments"}
]
# Determine number of results based on research depth
num_results = {"low": 2, "medium": 3, "high": 5}.get(research_depth, 3)
research_results = {"topics": []}
# Conduct research for each topic
for topic in topics[:3]: # Limit to 3 topics to avoid excessive API calls
topic_results = {"topic": topic["topic"], "sources": []}
# Try Exa search first
try:
exa_results = exa_search(topic["query"], num_results=num_results)
if exa_results:
topic_results["sources"].extend(exa_results)
except Exception as e:
logger.warning(f"Exa search failed: {e}")
# If Exa didn't return enough results, try Tavily
if len(topic_results["sources"]) < num_results:
try:
tavily_results = tavily_search(topic["query"], num_results=num_results)
if tavily_results:
# Avoid duplicates
existing_urls = [s["url"] for s in topic_results["sources"]]
for result in tavily_results:
if result["url"] not in existing_urls:
topic_results["sources"].append(result)
existing_urls.append(result["url"])
except Exception as e:
logger.warning(f"Tavily search failed: {e}")
research_results["topics"].append(topic_results)
return research_results
def generate_rewrite_prompt(self, original_content: Dict[str, Any],
user_preferences: Dict[str, Any],
research_results: Dict[str, Any],
content_analysis: Dict[str, Any]) -> str:
"""
Generate a prompt for the LLM to rewrite the blog.
Args:
original_content: Original blog content
user_preferences: User preferences for rewriting
research_results: Research results for updating content
content_analysis: Analysis of the original content
Returns:
Prompt string for the LLM
"""
logger.info("Generating rewrite prompt")
# Extract key information
title = original_content.get("title", "")
content = original_content.get("content", "")
# Truncate content if it's too long
max_content_length = 6000 # Adjust based on your LLM's context window
if len(content) > max_content_length:
content_preview = content[:max_content_length] + "...\\n[Content truncated due to length]"
else:
content_preview = content
# Format research results
research_summary = ""
for topic in research_results.get("topics", []):
research_summary += f"\\n## {topic['topic']}\\n"
for i, source in enumerate(topic.get("sources", [])[:3]): # Limit to 3 sources per topic
research_summary += f"Source {i+1}: {source.get('title', 'Untitled')}\\n"
research_summary += f"URL: {source.get('url', 'No URL')}\\n"
research_summary += f"Content: {source.get('content', 'No content')[:300]}...\\n\\n"
# Build the prompt
prompt = f"""
# Blog Rewriting Task
## Original Blog Information
Title: {title}
Word Count: {content_analysis.get('metrics', {}).get('word_count', 'Unknown')}
Estimated Age: {content_analysis.get('content_age', {}).get('months', 'Unknown')} months
## Rewriting Instructions
Mode: {user_preferences.get('rewrite_mode', 'standard')}
Target Tone: {user_preferences.get('tone', 'Professional')}
Target Word Count: {user_preferences.get('target_word_count', 'Same as original')}
Focus Keywords: {', '.join(user_preferences.get('keywords', []))}
## Special Instructions
{user_preferences.get('special_instructions', 'No special instructions')}
## Recent Research Findings
{research_summary if research_summary else "No research results available."}
## Original Content
{content_preview}
## Your Task
Please rewrite this blog post according to the instructions above. The rewritten blog should:
1. Maintain the core message and value of the original content
2. Update any outdated information based on the research findings
3. Adopt the requested tone and style
4. Incorporate the focus keywords naturally
5. Improve readability and engagement
6. Maintain a logical structure with appropriate headings
7. Include a compelling introduction and conclusion
## Output Format
Please provide your response in the following JSON format:
```json
{{
"title": "Rewritten title",
"meta_description": "SEO-optimized meta description (max 160 characters)",
"content": "Full rewritten content with proper markdown formatting",
"suggested_images": [
{{
"description": "Brief description of a suggested image",
"caption": "Suggested caption for the image",
"placement": "Where this image should be placed (e.g., 'After introduction', 'Before conclusion')"
}}
]
}}
```
Ensure the JSON is properly formatted and valid.
"""
return prompt
def rewrite_blog(self, original_content: Dict[str, Any],
user_preferences: Dict[str, Any],
research_results: Dict[str, Any],
content_analysis: Dict[str, Any]) -> Dict[str, Any]:
"""
Rewrite the blog based on original content, user preferences, and research.
Args:
original_content: Original blog content
user_preferences: User preferences for rewriting
research_results: Research results for updating content
content_analysis: Analysis of the original content
Returns:
Dictionary containing rewritten content
"""
logger.info("Rewriting blog content")
# Generate the prompt
prompt = self.generate_rewrite_prompt(
original_content, user_preferences, research_results, content_analysis
)
# Call the LLM to rewrite the content
try:
response = llm_text_gen(prompt)
# Extract JSON from the response
json_match = re.search(r'```json\s*(.*?)\s*```', response, re.DOTALL)
if json_match:
json_str = json_match.group(1)
else:
json_str = response
# Clean up the JSON string
json_str = re.sub(r'```(json)?', '', json_str).strip()
# Parse the JSON
rewritten_content = json.loads(json_str)
# Validate the response structure
required_fields = ["title", "meta_description", "content"]
for field in required_fields:
if field not in rewritten_content:
rewritten_content[field] = original_content.get(field, "")
# Ensure suggested_images exists
if "suggested_images" not in rewritten_content:
rewritten_content["suggested_images"] = []
return rewritten_content
except Exception as e:
logger.error(f"Error rewriting blog: {e}")
return {
"title": original_content.get("title", ""),
"meta_description": original_content.get("meta_description", ""),
"content": original_content.get("content", ""),
"suggested_images": [],
"error": str(e)
}
def generate_image(self, image_prompt: str, style: str = "realistic") -> str:
"""
Generate an image based on the prompt.
Args:
image_prompt: Prompt for image generation
style: Style of the image
Returns:
Path to the generated image
"""
logger.info(f"Generating image with prompt: {image_prompt}")
try:
image_path = generate_image(image_prompt, style=style)
return image_path
except Exception as e:
logger.error(f"Error generating image: {e}")
return ""
def write_blog_rewriter():
"""Main function to display the blog rewriter UI."""
st.title("AI Blog Rewriter & Updater")
# Create a container for the header section
with st.container():
st.markdown("""
<div style="background-color: #f8f9fa; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
<h3 style="margin-top: 0;">Revitalize Your Content</h3>
<p>Update, fact-check, and enhance your existing blog posts with AI assistance.
Our tool analyzes your content, researches the latest information, and rewrites your blog
to be more engaging, accurate, and SEO-friendly.</p>
</div>
""", unsafe_allow_html=True)
# Initialize the BlogRewriter class
if "blog_rewriter" not in st.session_state:
st.session_state.blog_rewriter = BlogRewriter()
# Initialize session state variables
if "original_content" not in st.session_state:
st.session_state.original_content = {}
if "content_analysis" not in st.session_state:
st.session_state.content_analysis = {}
if "research_results" not in st.session_state:
st.session_state.research_results = {}
if "rewritten_content" not in st.session_state:
st.session_state.rewritten_content = {}
if "generated_images" not in st.session_state:
st.session_state.generated_images = {}
if "current_step" not in st.session_state:
st.session_state.current_step = 1
# Create tabs for the workflow
tab1, tab2, tab3, tab4 = st.tabs([
"1⃣ Import Content",
"2⃣ Analyze & Research",
"3⃣ Rewrite Settings",
"4⃣ Results & Export"
])
# Tab 1: Import Content
with tab1:
st.header("Import Your Blog Content")
import_method = st.radio(
"Choose import method:",
["Import from URL", "Paste content manually"],
horizontal=True
)
if import_method == "Import from URL":
url = st.text_input(
"Enter blog URL:",
placeholder="https://example.com/blog-post",
help="Enter the full URL of the blog post you want to rewrite"
)
if st.button("Import Content", type="primary"):
if not url:
st.error("Please enter a valid URL")
else:
with st.spinner("Extracting content from URL..."):
# Extract content from URL
st.session_state.original_content = st.session_state.blog_rewriter.extract_content_from_url(url)
if "error" in st.session_state.original_content:
st.error(f"Error extracting content: {st.session_state.original_content['error']}")
else:
st.success("Content extracted successfully!")
st.session_state.current_step = 2
# Auto-click the next tab
st.experimental_rerun()
else:
col1, col2 = st.columns([3, 1])
with col1:
title = st.text_input(
"Blog Title:",
placeholder="Enter the title of your blog post"
)
with col2:
author = st.text_input(
"Author (optional):",
placeholder="Author name"
)
meta_description = st.text_area(
"Meta Description (optional):",
placeholder="Enter the meta description of your blog post",
max_chars=MAX_META_DESCRIPTION_LENGTH,
height=80
)
content = st.text_area(
"Blog Content:",
placeholder="Paste your blog content here...",
height=300
)
if st.button("Import Content", type="primary"):
if not title or not content:
st.error("Please enter both title and content")
else:
# Store the manually entered content
st.session_state.original_content = {
"title": title,
"meta_description": meta_description,
"content": content,
"author": author,
"headings": [],
"images": [],
"publish_date": None,
"url": None
}
st.success("Content imported successfully!")
st.session_state.current_step = 2
# Auto-click the next tab
st.experimental_rerun()
# Display the imported content if available
if st.session_state.original_content and "title" in st.session_state.original_content:
with st.expander("View Imported Content", expanded=False):
st.subheader(st.session_state.original_content["title"])
if st.session_state.original_content.get("meta_description"):
st.markdown(f"**Meta Description:** {st.session_state.original_content['meta_description']}")
if st.session_state.original_content.get("author"):
st.markdown(f"**Author:** {st.session_state.original_content['author']}")
if st.session_state.original_content.get("publish_date"):
st.markdown(f"**Published:** {st.session_state.original_content['publish_date']}")
st.markdown("**Content Preview:**")
content_preview = st.session_state.original_content["content"]
if len(content_preview) > 1000:
content_preview = content_preview[:1000] + "..."
st.text_area("", content_preview, height=200, disabled=True)
# Display images if available
if st.session_state.original_content.get("images"):
st.markdown(f"**Images:** {len(st.session_state.original_content['images'])} images found")
# Tab 2: Analyze & Research
with tab2:
st.header("Analyze & Research")
if not st.session_state.original_content or "title" not in st.session_state.original_content:
st.info("Please import your blog content first")
else:
col1, col2 = st.columns(2)
with col1:
if st.button("Analyze Content", type="primary"):
with st.spinner("Analyzing content..."):
# Analyze the content
st.session_state.content_analysis = st.session_state.blog_rewriter.analyze_content(
st.session_state.original_content
)
st.success("Content analysis complete!")
with col2:
research_depth = st.selectbox(
"Research Depth:",
["low", "medium", "high"],
index=1,
format_func=lambda x: {"low": "Basic", "medium": "Standard", "high": "Comprehensive"}[x],
help="Choose the depth of research to update your content"
)
if st.button("Conduct Research", type="primary"):
with st.spinner("Researching latest information..."):
# Conduct research
st.session_state.research_results = st.session_state.blog_rewriter.conduct_research(
st.session_state.original_content["title"],
st.session_state.original_content["content"],
research_depth
)
st.success("Research complete!")
# Display content analysis if available
if st.session_state.content_analysis:
st.subheader("Content Analysis")
metrics = st.session_state.content_analysis.get("metrics", {})
# Create metrics display
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Word Count", metrics.get("word_count", 0))
with col2:
st.metric("Paragraphs", metrics.get("paragraph_count", 0))
with col3:
st.metric("Sentences", metrics.get("sentence_count", 0))
with col4:
content_age = st.session_state.content_analysis.get("content_age", {})
if "months" in content_age:
st.metric("Content Age", f"{content_age['months']} months")
elif "error" in content_age:
st.metric("Content Age", "Unknown")
# Heading structure
heading_structure = st.session_state.content_analysis.get("heading_structure", {})
if heading_structure:
st.markdown("**Heading Structure:**")
for level, count in sorted(heading_structure.items()):
st.markdown(f"H{level}: {count} headings")
# Image analysis
images = st.session_state.content_analysis.get("images", {})
if images:
st.markdown(f"**Images:** {images.get('count', 0)} images found, {images.get('with_alt_text', 0)} with alt text")
# Display research results if available
if st.session_state.research_results:
st.subheader("Research Results")
topics = st.session_state.research_results.get("topics", [])
if topics:
for topic in topics:
with st.expander(f"Topic: {topic['topic']}", expanded=False):
for i, source in enumerate(topic.get("sources", [])):
st.markdown(f"**Source {i+1}:** {source.get('title', 'Untitled')}")
st.markdown(f"**URL:** {source.get('url', 'No URL')}")
st.markdown(f"**Content Preview:** {source.get('content', 'No content')[:200]}...")
st.markdown("---")
else:
st.info("No research results available")
# Enable proceeding to the next step if both analysis and research are done
if st.session_state.content_analysis and st.session_state.research_results:
if st.button("Proceed to Rewrite Settings", type="primary"):
st.session_state.current_step = 3
st.experimental_rerun()
# Tab 3: Rewrite Settings
with tab3:
st.header("Rewrite Settings")
if not st.session_state.original_content or "title" not in st.session_state.original_content:
st.info("Please import your blog content first")
elif not st.session_state.content_analysis or not st.session_state.research_results:
st.info("Please complete content analysis and research first")
else:
# Create a form for rewrite settings
with st.form("rewrite_settings_form"):
st.subheader("Content Transformation")
col1, col2 = st.columns(2)
with col1:
rewrite_mode = st.selectbox(
"Rewrite Mode:",
list(REWRITE_MODES.keys()),
format_func=lambda x: x.replace("_", " ").title(),
help="Choose how you want to transform your content"
)
st.info(REWRITE_MODES[rewrite_mode])
with col2:
tone = st.selectbox(
"Target Tone:",
TONE_OPTIONS,
index=0,
help="Choose the tone for your rewritten content"
)
st.subheader("Content Length")
original_word_count = st.session_state.content_analysis.get("metrics", {}).get("word_count", 0)
length_option = st.radio(
"Target Length:",
["same", "shorter", "longer", "custom"],
format_func=lambda x: {
"same": f"Same as original ({original_word_count} words)",
"shorter": f"Shorter (about {int(original_word_count * 0.7)} words)",
"longer": f"Longer (about {int(original_word_count * 1.3)} words)",
"custom": "Custom word count"
}[x],
horizontal=True
)
if length_option == "custom":
target_word_count = st.number_input(
"Custom Word Count:",
min_value=100,
max_value=10000,
value=original_word_count,
step=100
)
else:
target_word_count = {
"same": original_word_count,
"shorter": int(original_word_count * 0.7),
"longer": int(original_word_count * 1.3)
}[length_option]
st.subheader("SEO Optimization")
keywords = st.text_input(
"Focus Keywords (comma-separated):",
placeholder="e.g., digital marketing, SEO, content strategy",
help="Enter keywords to optimize your content for"
)
st.subheader("Additional Instructions")
special_instructions = st.text_area(
"Special Instructions (optional):",
placeholder="Add any specific instructions for rewriting your content...",
help="Provide any additional instructions for the AI"
)
# Submit button
submitted = st.form_submit_button("Rewrite Blog", type="primary")
if submitted:
# Process the form data
user_preferences = {
"rewrite_mode": rewrite_mode,
"tone": tone,
"target_word_count": target_word_count,
"keywords": [k.strip() for k in keywords.split(",")] if keywords else [],
"special_instructions": special_instructions
}
with st.spinner("Rewriting your blog..."):
# Rewrite the blog
st.session_state.rewritten_content = st.session_state.blog_rewriter.rewrite_blog(
st.session_state.original_content,
user_preferences,
st.session_state.research_results,
st.session_state.content_analysis
)
if "error" in st.session_state.rewritten_content:
st.error(f"Error rewriting blog: {st.session_state.rewritten_content['error']}")
else:
st.success("Blog rewritten successfully!")
st.session_state.current_step = 4
st.experimental_rerun()
# Tab 4: Results & Export
with tab4:
st.header("Results & Export")
if not st.session_state.rewritten_content or "title" not in st.session_state.rewritten_content:
st.info("Please complete the rewriting process first")
else:
# Display the rewritten content
st.subheader("Rewritten Blog")
# Title and meta description
st.markdown(f"## {st.session_state.rewritten_content['title']}")
if st.session_state.rewritten_content.get("meta_description"):
with st.expander("Meta Description", expanded=True):
st.text_area(
"",
st.session_state.rewritten_content["meta_description"],
height=80,
disabled=True
)
# Create tabs for different views
content_tab1, content_tab2 = st.tabs(["Preview", "Markdown"])
with content_tab1:
st.markdown(st.session_state.rewritten_content["content"])
with content_tab2:
st.text_area(
"",
st.session_state.rewritten_content["content"],
height=400
)
# Image generation section
st.subheader("Generate Images")
suggested_images = st.session_state.rewritten_content.get("suggested_images", [])
if suggested_images:
st.markdown("**Suggested Images:**")
for i, img in enumerate(suggested_images):
with st.expander(f"Image {i+1}: {img.get('description', 'No description')}", expanded=False):
st.markdown(f"**Description:** {img.get('description', 'No description')}")
st.markdown(f"**Caption:** {img.get('caption', 'No caption')}")
st.markdown(f"**Placement:** {img.get('placement', 'No placement specified')}")
# Generate image button
col1, col2 = st.columns([3, 1])
with col1:
image_prompt = st.text_area(
"Image Prompt:",
value=img.get('description', ''),
key=f"image_prompt_{i}"
)
with col2:
style = st.selectbox(
"Style:",
["realistic", "artistic", "cartoon", "3d_render"],
key=f"style_{i}"
)
if st.button("Generate Image", key=f"gen_img_{i}"):
with st.spinner("Generating image..."):
image_path = st.session_state.blog_rewriter.generate_image(image_prompt, style)
if image_path:
# Store the generated image
if "generated_images" not in st.session_state:
st.session_state.generated_images = {}
st.session_state.generated_images[f"image_{i}"] = {
"path": image_path,
"caption": img.get('caption', ''),
"placement": img.get('placement', '')
}
st.success("Image generated successfully!")
st.experimental_rerun()
# Display the generated image if available
if f"image_{i}" in st.session_state.generated_images:
st.image(
st.session_state.generated_images[f"image_{i}"]["path"],
caption=st.session_state.generated_images[f"image_{i}"]["caption"],
use_column_width=True
)
else:
st.info("No image suggestions available")
# Custom image generation
with st.expander("Generate Custom Image", expanded=True):
col1, col2 = st.columns([3, 1])
with col1:
custom_image_prompt = st.text_area(
"Image Prompt:",
placeholder="Describe the image you want to generate..."
)
with col2:
custom_style = st.selectbox(
"Style:",
["realistic", "artistic", "cartoon", "3d_render"]
)
if st.button("Generate Custom Image"):
if not custom_image_prompt:
st.error("Please enter an image prompt")
else:
with st.spinner("Generating image..."):
image_path = st.session_state.blog_rewriter.generate_image(custom_image_prompt, custom_style)
if image_path:
# Store the generated image
if "generated_images" not in st.session_state:
st.session_state.generated_images = {}
st.session_state.generated_images["custom_image"] = {
"path": image_path,
"caption": "Custom generated image",
"placement": "Custom placement"
}
st.success("Image generated successfully!")
st.experimental_rerun()
# Display the generated custom image if available
if "custom_image" in st.session_state.generated_images:
st.image(
st.session_state.generated_images["custom_image"]["path"],
caption=st.session_state.generated_images["custom_image"]["caption"],
use_column_width=True
)
# Export options
st.subheader("Export Options")
col1, col2, col3 = st.columns(3)
with col1:
st.download_button(
"Download as Markdown",
data=st.session_state.rewritten_content["content"],
file_name=f"{st.session_state.rewritten_content['title'].replace(' ', '_')}.md",
mime="text/markdown"
)
with col2:
# Create HTML version
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<title>{st.session_state.rewritten_content['title']}</title>
<meta name="description" content="{st.session_state.rewritten_content.get('meta_description', '')}">
<style>
body {{ font-family: Arial, sans-serif; line-height: 1.6; max-width: 800px; margin: 0 auto; padding: 20px; }}
h1, h2, h3, h4, h5, h6 {{ color: #333; }}
img {{ max-width: 100%; height: auto; }}
pre {{ background-color: #f5f5f5; padding: 15px; border-radius: 5px; overflow-x: auto; }}
blockquote {{ border-left: 5px solid #eee; padding-left: 15px; margin-left: 0; }}
</style>
</head>
<body>
<h1>{st.session_state.rewritten_content['title']}</h1>
{st.session_state.rewritten_content['content']}
</body>
</html>
"""
st.download_button(
"Download as HTML",
data=html_content,
file_name=f"{st.session_state.rewritten_content['title'].replace(' ', '_')}.html",
mime="text/html"
)
with col3:
# Create JSON version with all content and metadata
json_content = {
"title": st.session_state.rewritten_content["title"],
"meta_description": st.session_state.rewritten_content.get("meta_description", ""),
"content": st.session_state.rewritten_content["content"],
"suggested_images": st.session_state.rewritten_content.get("suggested_images", []),
"generated_images": [
{
"caption": img_data["caption"],
"placement": img_data["placement"],
"path": img_data["path"]
}
for img_key, img_data in st.session_state.generated_images.items()
] if hasattr(st.session_state, "generated_images") else [],
"original_title": st.session_state.original_content.get("title", ""),
"original_url": st.session_state.original_content.get("url", ""),
"rewrite_date": datetime.now().isoformat()
}
st.download_button(
"Download as JSON",
data=json.dumps(json_content, indent=2),
file_name=f"{st.session_state.rewritten_content['title'].replace(' ', '_')}.json",
mime="application/json"
)
# Copy to clipboard buttons
st.subheader("Quick Copy")
col1, col2, col3 = st.columns(3)
with col1:
if st.button("Copy Title", key="copy_title"):
st.code(st.session_state.rewritten_content["title"])
st.success("Title copied to clipboard!")
with col2:
if st.button("Copy Meta Description", key="copy_meta"):
st.code(st.session_state.rewritten_content.get("meta_description", ""))
st.success("Meta description copied to clipboard!")
with col3:
if st.button("Copy Full Content", key="copy_content"):
st.success("Content copied to clipboard!")
# Comparison with original
with st.expander("Compare with Original", expanded=False):
comp_col1, comp_col2 = st.columns(2)
with comp_col1:
st.subheader("Original")
st.markdown(f"**Title:** {st.session_state.original_content.get('title', '')}")
if st.session_state.original_content.get("meta_description"):
st.markdown(f"**Meta Description:** {st.session_state.original_content['meta_description']}")
st.text_area(
"Original Content",
st.session_state.original_content.get("content", ""),
height=300,
disabled=True
)
with comp_col2:
st.subheader("Rewritten")
st.markdown(f"**Title:** {st.session_state.rewritten_content['title']}")
if st.session_state.rewritten_content.get("meta_description"):
st.markdown(f"**Meta Description:** {st.session_state.rewritten_content['meta_description']}")
st.text_area(
"Rewritten Content",
st.session_state.rewritten_content["content"],
height=300,
disabled=True
)
# Start over button
if st.button("Start Over", type="primary"):
# Reset session state
for key in ["original_content", "content_analysis", "research_results",
"rewritten_content", "generated_images", "current_step"]:
if key in st.session_state:
del st.session_state[key]
st.experimental_rerun()
if __name__ == "__main__":
write_blog_rewriter()