ALwrity/lib/ai_writers/ai_blog_rewriter.py

"""
AI Blog Rewriter Module

This module provides functionality to rewrite and update existing blog content
with improved quality, factual accuracy, and SEO optimization.
"""

import streamlit as st
import requests
from bs4 import BeautifulSoup
import re
import time
import logging
from typing import Dict, List, Tuple, Optional, Any
import json
import os
from datetime import datetime

# Import required modules from the project
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
from ..gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
from ..web_research.exa_search import exa_search
from ..web_research.tavily_search import tavily_search

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Define constants
MAX_TITLE_LENGTH = 70
MAX_META_DESCRIPTION_LENGTH = 160
REWRITE_MODES = {
    "standard": "Standard rewrite with improved clarity and flow",
    "seo_optimization": "Optimize for search engines with targeted keywords",
    "simplification": "Simplify complex content for broader audience",
    "expansion": "Expand with additional details and examples",
    "fact_check": "Focus on fact-checking and updating information",
    "tone_shift": "Change the tone while preserving content",
    "modernization": "Update outdated content with current information"
}

# Define tone options
TONE_OPTIONS = [
    "Professional", "Conversational", "Academic", "Enthusiastic",
    "Authoritative", "Friendly", "Technical", "Inspirational"
]

class BlogRewriter:
    """Class to handle blog rewriting functionality."""

    def __init__(self):
        """Initialize the BlogRewriter class."""
        self.original_content = {}
        self.rewritten_content = {}
        self.research_results = {}
        self.content_analysis = {}
        self.image_suggestions = []

    def extract_content_from_url(self, url: str) -> Dict[str, Any]:
        """
        Extract content from a given URL.

        Args:
            url: The URL to extract content from

        Returns:
            Dictionary containing extracted content
        """
        logger.info(f"Extracting content from URL: {url}")

        try:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }
            response = requests.get(url, headers=headers, timeout=15)
            response.raise_for_status()

            soup = BeautifulSoup(response.text, 'html.parser')

            # Extract title
            title = soup.title.string if soup.title else ""

            # Extract meta description
            meta_desc = ""
            meta_tag = soup.find("meta", attrs={"name": "description"})
            if meta_tag and "content" in meta_tag.attrs:
                meta_desc = meta_tag["content"]

            # Extract main content - this is a simplified approach
            # In a real implementation, you'd want more sophisticated content extraction
            content = ""
            article_tag = soup.find("article")
            if article_tag:
                content = article_tag.get_text(separator="\\n\\n")
            else:
                # Try to find main content by looking for common content containers
                main_content = soup.find(["main", "div", "section"], class_=re.compile(r"content|article|post|entry"))
                if main_content:
                    # Remove navigation, sidebars, comments, etc.
                    for elem in main_content.find_all(["nav", "aside", "footer", "comments", "script", "style"]):
                        elem.decompose()
                    content = main_content.get_text(separator="\\n\\n")
                else:
                    # Fallback to body content
                    body = soup.find("body")
                    if body:
                        content = body.get_text(separator="\\n\\n")

            # Clean up the content
            content = re.sub(r'\\n{3,}', '\\n\\n', content)  # Remove excessive newlines
            content = re.sub(r'\s{2,}', ' ', content)     # Remove excessive spaces

            # Extract headings for structure analysis
            headings = []
            for h in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
                headings.append({
                    "level": int(h.name[1]),
                    "text": h.get_text().strip()
                })

            # Extract images
            images = []
            for img in soup.find_all("img"):
                if img.get("src") and not img.get("src").startswith("data:"):
                    image_url = img.get("src")
                    if not image_url.startswith(("http://", "https://")):
                        # Convert relative URL to absolute
                        base_url = "/".join(url.split("/")[:3])  # Get domain
                        image_url = f"{base_url}/{image_url.lstrip('/')}"

                    alt_text = img.get("alt", "")
                    images.append({
                        "url": image_url,
                        "alt_text": alt_text
                    })

            # Extract publish date if available
            publish_date = None
            date_meta = soup.find("meta", attrs={"property": "article:published_time"})
            if date_meta and "content" in date_meta.attrs:
                publish_date = date_meta["content"]
            else:
                # Try common date patterns in the HTML
                date_elem = soup.find(["time", "span", "div"], class_=re.compile(r"date|time|publish"))
                if date_elem and date_elem.get_text():
                    publish_date = date_elem.get_text().strip()

            # Extract author if available
            author = None
            author_meta = soup.find("meta", attrs={"name": "author"})
            if author_meta and "content" in author_meta.attrs:
                author = author_meta["content"]
            else:
                # Try common author patterns in the HTML
                author_elem = soup.find(["a", "span", "div"], class_=re.compile(r"author|byline"))
                if author_elem and author_elem.get_text():
                    author = author_elem.get_text().strip()

            return {
                "title": title,
                "meta_description": meta_desc,
                "content": content,
                "headings": headings,
                "images": images,
                "publish_date": publish_date,
                "author": author,
                "url": url
            }

        except Exception as e:
            logger.error(f"Error extracting content from URL: {e}")
            return {
                "title": "",
                "meta_description": "",
                "content": "",
                "headings": [],
                "images": [],
                "publish_date": None,
                "author": None,
                "url": url,
                "error": str(e)
            }

    def analyze_content(self, content: Dict[str, Any]) -> Dict[str, Any]:
        """
        Analyze the extracted content to provide insights.

        Args:
            content: Dictionary containing extracted content

        Returns:
            Dictionary containing content analysis
        """
        logger.info("Analyzing content")

        analysis = {}

        # Basic metrics
        text_content = content.get("content", "")
        word_count = len(text_content.split())
        sentence_count = len(re.split(r'[.!?]+', text_content))
        paragraph_count = len(re.split(r'\\n\\n+', text_content))

        analysis["metrics"] = {
            "word_count": word_count,
            "sentence_count": sentence_count,
            "paragraph_count": paragraph_count,
            "avg_words_per_sentence": round(word_count / max(sentence_count, 1), 1),
            "avg_sentences_per_paragraph": round(sentence_count / max(paragraph_count, 1), 1)
        }

        # Heading structure analysis
        headings = content.get("headings", [])
        heading_structure = {}
        for h in headings:
            level = h["level"]
            if level not in heading_structure:
                heading_structure[level] = 0
            heading_structure[level] += 1

        analysis["heading_structure"] = heading_structure

        # Content age analysis
        publish_date = content.get("publish_date")
        if publish_date:
            try:
                # Try to parse the date in various formats
                if "T" in publish_date:
                    # ISO format
                    pub_date = datetime.fromisoformat(publish_date.replace("Z", "+00:00"))
                else:
                    # Try common date formats
                    date_formats = [
                        "%Y-%m-%d", "%d-%m-%Y", "%B %d, %Y", "%b %d, %Y",
                        "%d %B %Y", "%d %b %Y", "%Y/%m/%d", "%d/%m/%Y"
                    ]
                    for fmt in date_formats:
                        try:
                            pub_date = datetime.strptime(publish_date, fmt)
                            break
                        except ValueError:
                            continue

                # Calculate content age
                now = datetime.now()
                age_days = (now - pub_date).days
                analysis["content_age"] = {
                    "days": age_days,
                    "months": round(age_days / 30, 1),
                    "years": round(age_days / 365, 1)
                }
            except Exception as e:
                logger.warning(f"Could not parse publish date: {e}")
                analysis["content_age"] = {"error": "Could not determine content age"}
        else:
            analysis["content_age"] = {"error": "No publish date found"}

        # Image analysis
        images = content.get("images", [])
        analysis["images"] = {
            "count": len(images),
            "with_alt_text": sum(1 for img in images if img.get("alt_text"))
        }

        return analysis

    def conduct_research(self, title: str, content: str, research_depth: str = "medium") -> Dict[str, Any]:
        """
        Conduct web research to find updated information related to the blog content.

        Args:
            title: Blog title
            content: Blog content
            research_depth: Depth of research (low, medium, high)

        Returns:
            Dictionary containing research results
        """
        logger.info(f"Conducting research with depth: {research_depth}")

        # Extract key topics from the content
        prompt = f"""
        Extract 3-5 key topics or claims from this blog content that might need fact-checking or updating.
        For each topic, provide a concise search query that would help find the most recent information.

        Blog title: {title}

        First 1000 characters of content:
        {content[:1000]}...

        Format your response as a JSON array of objects with 'topic' and 'query' fields.
        """

        try:
            topics_json = llm_text_gen(prompt)
            # Extract JSON from the response
            topics_json = re.search(r'\[.*\]', topics_json, re.DOTALL)
            if topics_json:
                topics = json.loads(topics_json.group(0))
            else:
                # Fallback if JSON extraction fails
                topics = [
                    {"topic": title, "query": title + " latest information"},
                    {"topic": "Updates on " + title, "query": title + " recent developments"}
                ]
        except Exception as e:
            logger.error(f"Error extracting topics: {e}")
            topics = [
                {"topic": title, "query": title + " latest information"},
                {"topic": "Updates on " + title, "query": title + " recent developments"}
            ]

        # Determine number of results based on research depth
        num_results = {"low": 2, "medium": 3, "high": 5}.get(research_depth, 3)

        research_results = {"topics": []}

        # Conduct research for each topic
        for topic in topics[:3]:  # Limit to 3 topics to avoid excessive API calls
            topic_results = {"topic": topic["topic"], "sources": []}

            # Try Exa search first
            try:
                exa_results = exa_search(topic["query"], num_results=num_results)
                if exa_results:
                    topic_results["sources"].extend(exa_results)
            except Exception as e:
                logger.warning(f"Exa search failed: {e}")

            # If Exa didn't return enough results, try Tavily
            if len(topic_results["sources"]) < num_results:
                try:
                    tavily_results = tavily_search(topic["query"], num_results=num_results)
                    if tavily_results:
                        # Avoid duplicates
                        existing_urls = [s["url"] for s in topic_results["sources"]]
                        for result in tavily_results:
                            if result["url"] not in existing_urls:
                                topic_results["sources"].append(result)
                                existing_urls.append(result["url"])
                except Exception as e:
                    logger.warning(f"Tavily search failed: {e}")

            research_results["topics"].append(topic_results)

        return research_results

    def generate_rewrite_prompt(self, original_content: Dict[str, Any],
                               user_preferences: Dict[str, Any],
                               research_results: Dict[str, Any],
                               content_analysis: Dict[str, Any]) -> str:
        """
        Generate a prompt for the LLM to rewrite the blog.

        Args:
            original_content: Original blog content
            user_preferences: User preferences for rewriting
            research_results: Research results for updating content
            content_analysis: Analysis of the original content

        Returns:
            Prompt string for the LLM
        """
        logger.info("Generating rewrite prompt")

        # Extract key information
        title = original_content.get("title", "")
        content = original_content.get("content", "")

        # Truncate content if it's too long
        max_content_length = 6000  # Adjust based on your LLM's context window
        if len(content) > max_content_length:
            content_preview = content[:max_content_length] + "...\\n[Content truncated due to length]"
        else:
            content_preview = content

        # Format research results
        research_summary = ""
        for topic in research_results.get("topics", []):
            research_summary += f"\\n## {topic['topic']}\\n"
            for i, source in enumerate(topic.get("sources", [])[:3]):  # Limit to 3 sources per topic
                research_summary += f"Source {i+1}: {source.get('title', 'Untitled')}\\n"
                research_summary += f"URL: {source.get('url', 'No URL')}\\n"
                research_summary += f"Content: {source.get('content', 'No content')[:300]}...\\n\\n"

        # Build the prompt
        prompt = f"""
        # Blog Rewriting Task

        ## Original Blog Information
        Title: {title}
        Word Count: {content_analysis.get('metrics', {}).get('word_count', 'Unknown')}
        Estimated Age: {content_analysis.get('content_age', {}).get('months', 'Unknown')} months

        ## Rewriting Instructions
        Mode: {user_preferences.get('rewrite_mode', 'standard')}
        Target Tone: {user_preferences.get('tone', 'Professional')}
        Target Word Count: {user_preferences.get('target_word_count', 'Same as original')}
        Focus Keywords: {', '.join(user_preferences.get('keywords', []))}

        ## Special Instructions
        {user_preferences.get('special_instructions', 'No special instructions')}

        ## Recent Research Findings
        {research_summary if research_summary else "No research results available."}

        ## Original Content
        {content_preview}

        ## Your Task
        Please rewrite this blog post according to the instructions above. The rewritten blog should:

        1. Maintain the core message and value of the original content
        2. Update any outdated information based on the research findings
        3. Adopt the requested tone and style
        4. Incorporate the focus keywords naturally
        5. Improve readability and engagement
        6. Maintain a logical structure with appropriate headings
        7. Include a compelling introduction and conclusion

        ## Output Format
        Please provide your response in the following JSON format:
        ```json
        {{
            "title": "Rewritten title",
            "meta_description": "SEO-optimized meta description (max 160 characters)",
            "content": "Full rewritten content with proper markdown formatting",
            "suggested_images": [
                {{
                    "description": "Brief description of a suggested image",
                    "caption": "Suggested caption for the image",
                    "placement": "Where this image should be placed (e.g., 'After introduction', 'Before conclusion')"
                }}
            ]
        }}
        ```

        Ensure the JSON is properly formatted and valid.
        """

        return prompt

    def rewrite_blog(self, original_content: Dict[str, Any],
                    user_preferences: Dict[str, Any],
                    research_results: Dict[str, Any],
                    content_analysis: Dict[str, Any]) -> Dict[str, Any]:
        """
        Rewrite the blog based on original content, user preferences, and research.

        Args:
            original_content: Original blog content
            user_preferences: User preferences for rewriting
            research_results: Research results for updating content
            content_analysis: Analysis of the original content

        Returns:
            Dictionary containing rewritten content
        """
        logger.info("Rewriting blog content")

        # Generate the prompt
        prompt = self.generate_rewrite_prompt(
            original_content, user_preferences, research_results, content_analysis
        )

        # Call the LLM to rewrite the content
        try:
            response = llm_text_gen(prompt)

            # Extract JSON from the response
            json_match = re.search(r'```json\s*(.*?)\s*```', response, re.DOTALL)
            if json_match:
                json_str = json_match.group(1)
            else:
                json_str = response

            # Clean up the JSON string
            json_str = re.sub(r'```(json)?', '', json_str).strip()

            # Parse the JSON
            rewritten_content = json.loads(json_str)

            # Validate the response structure
            required_fields = ["title", "meta_description", "content"]
            for field in required_fields:
                if field not in rewritten_content:
                    rewritten_content[field] = original_content.get(field, "")

            # Ensure suggested_images exists
            if "suggested_images" not in rewritten_content:
                rewritten_content["suggested_images"] = []

            return rewritten_content

        except Exception as e:
            logger.error(f"Error rewriting blog: {e}")
            return {
                "title": original_content.get("title", ""),
                "meta_description": original_content.get("meta_description", ""),
                "content": original_content.get("content", ""),
                "suggested_images": [],
                "error": str(e)
            }

    def generate_image(self, image_prompt: str, style: str = "realistic") -> str:
        """
        Generate an image based on the prompt.

        Args:
            image_prompt: Prompt for image generation
            style: Style of the image

        Returns:
            Path to the generated image
        """
        logger.info(f"Generating image with prompt: {image_prompt}")

        try:
            image_path = generate_image(image_prompt, style=style)
            return image_path
        except Exception as e:
            logger.error(f"Error generating image: {e}")
            return ""


def write_blog_rewriter():
    """Main function to display the blog rewriter UI."""
    st.title("AI Blog Rewriter & Updater")

    # Create a container for the header section
    with st.container():
        st.markdown("""
        <div style="background-color: #f8f9fa; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
            <h3 style="margin-top: 0;">Revitalize Your Content</h3>
            <p>Update, fact-check, and enhance your existing blog posts with AI assistance.
            Our tool analyzes your content, researches the latest information, and rewrites your blog
            to be more engaging, accurate, and SEO-friendly.</p>
        </div>
        """, unsafe_allow_html=True)

    # Initialize the BlogRewriter class
    if "blog_rewriter" not in st.session_state:
        st.session_state.blog_rewriter = BlogRewriter()

    # Initialize session state variables
    if "original_content" not in st.session_state:
        st.session_state.original_content = {}
    if "content_analysis" not in st.session_state:
        st.session_state.content_analysis = {}
    if "research_results" not in st.session_state:
        st.session_state.research_results = {}
    if "rewritten_content" not in st.session_state:
        st.session_state.rewritten_content = {}
    if "generated_images" not in st.session_state:
        st.session_state.generated_images = {}
    if "current_step" not in st.session_state:
        st.session_state.current_step = 1

    # Create tabs for the workflow
    tab1, tab2, tab3, tab4 = st.tabs([
        "1️⃣ Import Content",
        "2️⃣ Analyze & Research",
        "3️⃣ Rewrite Settings",
        "4️⃣ Results & Export"
    ])

    # Tab 1: Import Content
    with tab1:
        st.header("Import Your Blog Content")

        import_method = st.radio(
            "Choose import method:",
            ["Import from URL", "Paste content manually"],
            horizontal=True
        )

        if import_method == "Import from URL":
            url = st.text_input(
                "Enter blog URL:",
                placeholder="https://example.com/blog-post",
                help="Enter the full URL of the blog post you want to rewrite"
            )

            if st.button("Import Content", type="primary"):
                if not url:
                    st.error("Please enter a valid URL")
                else:
                    with st.spinner("Extracting content from URL..."):
                        # Extract content from URL
                        st.session_state.original_content = st.session_state.blog_rewriter.extract_content_from_url(url)

                        if "error" in st.session_state.original_content:
                            st.error(f"Error extracting content: {st.session_state.original_content['error']}")
                        else:
                            st.success("Content extracted successfully!")
                            st.session_state.current_step = 2
                            # Auto-click the next tab
                            st.experimental_rerun()
        else:
            col1, col2 = st.columns([3, 1])

            with col1:
                title = st.text_input(
                    "Blog Title:",
                    placeholder="Enter the title of your blog post"
                )

            with col2:
                author = st.text_input(
                    "Author (optional):",
                    placeholder="Author name"
                )

            meta_description = st.text_area(
                "Meta Description (optional):",
                placeholder="Enter the meta description of your blog post",
                max_chars=MAX_META_DESCRIPTION_LENGTH,
                height=80
            )

            content = st.text_area(
                "Blog Content:",
                placeholder="Paste your blog content here...",
                height=300
            )

            if st.button("Import Content", type="primary"):
                if not title or not content:
                    st.error("Please enter both title and content")
                else:
                    # Store the manually entered content
                    st.session_state.original_content = {
                        "title": title,
                        "meta_description": meta_description,
                        "content": content,
                        "author": author,
                        "headings": [],
                        "images": [],
                        "publish_date": None,
                        "url": None
                    }

                    st.success("Content imported successfully!")
                    st.session_state.current_step = 2
                    # Auto-click the next tab
                    st.experimental_rerun()

        # Display the imported content if available
        if st.session_state.original_content and "title" in st.session_state.original_content:
            with st.expander("View Imported Content", expanded=False):
                st.subheader(st.session_state.original_content["title"])

                if st.session_state.original_content.get("meta_description"):
                    st.markdown(f"**Meta Description:** {st.session_state.original_content['meta_description']}")

                if st.session_state.original_content.get("author"):
                    st.markdown(f"**Author:** {st.session_state.original_content['author']}")

                if st.session_state.original_content.get("publish_date"):
                    st.markdown(f"**Published:** {st.session_state.original_content['publish_date']}")

                st.markdown("**Content Preview:**")
                content_preview = st.session_state.original_content["content"]
                if len(content_preview) > 1000:
                    content_preview = content_preview[:1000] + "..."
                st.text_area("", content_preview, height=200, disabled=True)

                # Display images if available
                if st.session_state.original_content.get("images"):
                    st.markdown(f"**Images:** {len(st.session_state.original_content['images'])} images found")

    # Tab 2: Analyze & Research
    with tab2:
        st.header("Analyze & Research")

        if not st.session_state.original_content or "title" not in st.session_state.original_content:
            st.info("Please import your blog content first")
        else:
            col1, col2 = st.columns(2)

            with col1:
                if st.button("Analyze Content", type="primary"):
                    with st.spinner("Analyzing content..."):
                        # Analyze the content
                        st.session_state.content_analysis = st.session_state.blog_rewriter.analyze_content(
                            st.session_state.original_content
                        )
                        st.success("Content analysis complete!")

            with col2:
                research_depth = st.selectbox(
                    "Research Depth:",
                    ["low", "medium", "high"],
                    index=1,
                    format_func=lambda x: {"low": "Basic", "medium": "Standard", "high": "Comprehensive"}[x],
                    help="Choose the depth of research to update your content"
                )

                if st.button("Conduct Research", type="primary"):
                    with st.spinner("Researching latest information..."):
                        # Conduct research
                        st.session_state.research_results = st.session_state.blog_rewriter.conduct_research(
                            st.session_state.original_content["title"],
                            st.session_state.original_content["content"],
                            research_depth
                        )
                        st.success("Research complete!")

            # Display content analysis if available
            if st.session_state.content_analysis:
                st.subheader("Content Analysis")

                metrics = st.session_state.content_analysis.get("metrics", {})

                # Create metrics display
                col1, col2, col3, col4 = st.columns(4)
                with col1:
                    st.metric("Word Count", metrics.get("word_count", 0))
                with col2:
                    st.metric("Paragraphs", metrics.get("paragraph_count", 0))
                with col3:
                    st.metric("Sentences", metrics.get("sentence_count", 0))
                with col4:
                    content_age = st.session_state.content_analysis.get("content_age", {})
                    if "months" in content_age:
                        st.metric("Content Age", f"{content_age['months']} months")
                    elif "error" in content_age:
                        st.metric("Content Age", "Unknown")

                # Heading structure
                heading_structure = st.session_state.content_analysis.get("heading_structure", {})
                if heading_structure:
                    st.markdown("**Heading Structure:**")
                    for level, count in sorted(heading_structure.items()):
                        st.markdown(f"H{level}: {count} headings")

                # Image analysis
                images = st.session_state.content_analysis.get("images", {})
                if images:
                    st.markdown(f"**Images:** {images.get('count', 0)} images found, {images.get('with_alt_text', 0)} with alt text")

            # Display research results if available
            if st.session_state.research_results:
                st.subheader("Research Results")

                topics = st.session_state.research_results.get("topics", [])
                if topics:
                    for topic in topics:
                        with st.expander(f"Topic: {topic['topic']}", expanded=False):
                            for i, source in enumerate(topic.get("sources", [])):
                                st.markdown(f"**Source {i+1}:** {source.get('title', 'Untitled')}")
                                st.markdown(f"**URL:** {source.get('url', 'No URL')}")
                                st.markdown(f"**Content Preview:** {source.get('content', 'No content')[:200]}...")
                                st.markdown("---")
                else:
                    st.info("No research results available")

            # Enable proceeding to the next step if both analysis and research are done
            if st.session_state.content_analysis and st.session_state.research_results:
                if st.button("Proceed to Rewrite Settings", type="primary"):
                    st.session_state.current_step = 3
                    st.experimental_rerun()

    # Tab 3: Rewrite Settings
    with tab3:
        st.header("Rewrite Settings")

        if not st.session_state.original_content or "title" not in st.session_state.original_content:
            st.info("Please import your blog content first")
        elif not st.session_state.content_analysis or not st.session_state.research_results:
            st.info("Please complete content analysis and research first")
        else:
            # Create a form for rewrite settings
            with st.form("rewrite_settings_form"):
                st.subheader("Content Transformation")

                col1, col2 = st.columns(2)

                with col1:
                    rewrite_mode = st.selectbox(
                        "Rewrite Mode:",
                        list(REWRITE_MODES.keys()),
                        format_func=lambda x: x.replace("_", " ").title(),
                        help="Choose how you want to transform your content"
                    )

                    st.info(REWRITE_MODES[rewrite_mode])

                with col2:
                    tone = st.selectbox(
                        "Target Tone:",
                        TONE_OPTIONS,
                        index=0,
                        help="Choose the tone for your rewritten content"
                    )

                st.subheader("Content Length")

                original_word_count = st.session_state.content_analysis.get("metrics", {}).get("word_count", 0)

                length_option = st.radio(
                    "Target Length:",
                    ["same", "shorter", "longer", "custom"],
                    format_func=lambda x: {
                        "same": f"Same as original ({original_word_count} words)",
                        "shorter": f"Shorter (about {int(original_word_count * 0.7)} words)",
                        "longer": f"Longer (about {int(original_word_count * 1.3)} words)",
                        "custom": "Custom word count"
                    }[x],
                    horizontal=True
                )

                if length_option == "custom":
                    target_word_count = st.number_input(
                        "Custom Word Count:",
                        min_value=100,
                        max_value=10000,
                        value=original_word_count,
                        step=100
                    )
                else:
                    target_word_count = {
                        "same": original_word_count,
                        "shorter": int(original_word_count * 0.7),
                        "longer": int(original_word_count * 1.3)
                    }[length_option]

                st.subheader("SEO Optimization")

                keywords = st.text_input(
                    "Focus Keywords (comma-separated):",
                    placeholder="e.g., digital marketing, SEO, content strategy",
                    help="Enter keywords to optimize your content for"
                )

                st.subheader("Additional Instructions")

                special_instructions = st.text_area(
                    "Special Instructions (optional):",
                    placeholder="Add any specific instructions for rewriting your content...",
                    help="Provide any additional instructions for the AI"
                )

                # Submit button
                submitted = st.form_submit_button("Rewrite Blog", type="primary")

                if submitted:
                    # Process the form data
                    user_preferences = {
                        "rewrite_mode": rewrite_mode,
                        "tone": tone,
                        "target_word_count": target_word_count,
                        "keywords": [k.strip() for k in keywords.split(",")] if keywords else [],
                        "special_instructions": special_instructions
                    }

                    with st.spinner("Rewriting your blog..."):
                        # Rewrite the blog
                        st.session_state.rewritten_content = st.session_state.blog_rewriter.rewrite_blog(
                            st.session_state.original_content,
                            user_preferences,
                            st.session_state.research_results,
                            st.session_state.content_analysis
                        )

                        if "error" in st.session_state.rewritten_content:
                            st.error(f"Error rewriting blog: {st.session_state.rewritten_content['error']}")
                        else:
                            st.success("Blog rewritten successfully!")
                            st.session_state.current_step = 4
                            st.experimental_rerun()

    # Tab 4: Results & Export
    with tab4:
        st.header("Results & Export")

        if not st.session_state.rewritten_content or "title" not in st.session_state.rewritten_content:
            st.info("Please complete the rewriting process first")
        else:
            # Display the rewritten content
            st.subheader("Rewritten Blog")

            # Title and meta description
            st.markdown(f"## {st.session_state.rewritten_content['title']}")

            if st.session_state.rewritten_content.get("meta_description"):
                with st.expander("Meta Description", expanded=True):
                    st.text_area(
                        "",
                        st.session_state.rewritten_content["meta_description"],
                        height=80,
                        disabled=True
                    )

            # Create tabs for different views
            content_tab1, content_tab2 = st.tabs(["Preview", "Markdown"])

            with content_tab1:
                st.markdown(st.session_state.rewritten_content["content"])

            with content_tab2:
                st.text_area(
                    "",
                    st.session_state.rewritten_content["content"],
                    height=400
                )

            # Image generation section
            st.subheader("Generate Images")

            suggested_images = st.session_state.rewritten_content.get("suggested_images", [])
            if suggested_images:
                st.markdown("**Suggested Images:**")

                for i, img in enumerate(suggested_images):
                    with st.expander(f"Image {i+1}: {img.get('description', 'No description')}", expanded=False):
                        st.markdown(f"**Description:** {img.get('description', 'No description')}")
                        st.markdown(f"**Caption:** {img.get('caption', 'No caption')}")
                        st.markdown(f"**Placement:** {img.get('placement', 'No placement specified')}")

                        # Generate image button
                        col1, col2 = st.columns([3, 1])

                        with col1:
                            image_prompt = st.text_area(
                                "Image Prompt:",
                                value=img.get('description', ''),
                                key=f"image_prompt_{i}"
                            )

                        with col2:
                            style = st.selectbox(
                                "Style:",
                                ["realistic", "artistic", "cartoon", "3d_render"],
                                key=f"style_{i}"
                            )

                            if st.button("Generate Image", key=f"gen_img_{i}"):
                                with st.spinner("Generating image..."):
                                    image_path = st.session_state.blog_rewriter.generate_image(image_prompt, style)

                                    if image_path:
                                        # Store the generated image
                                        if "generated_images" not in st.session_state:
                                            st.session_state.generated_images = {}

                                        st.session_state.generated_images[f"image_{i}"] = {
                                            "path": image_path,
                                            "caption": img.get('caption', ''),
                                            "placement": img.get('placement', '')
                                        }

                                        st.success("Image generated successfully!")
                                        st.experimental_rerun()

                        # Display the generated image if available
                        if f"image_{i}" in st.session_state.generated_images:
                            st.image(
                                st.session_state.generated_images[f"image_{i}"]["path"],
                                caption=st.session_state.generated_images[f"image_{i}"]["caption"],
                                use_column_width=True
                            )
            else:
                st.info("No image suggestions available")

                # Custom image generation
                with st.expander("Generate Custom Image", expanded=True):
                    col1, col2 = st.columns([3, 1])

                    with col1:
                        custom_image_prompt = st.text_area(
                            "Image Prompt:",
                            placeholder="Describe the image you want to generate..."
                        )

                    with col2:
                        custom_style = st.selectbox(
                            "Style:",
                            ["realistic", "artistic", "cartoon", "3d_render"]
                        )

                        if st.button("Generate Custom Image"):
                            if not custom_image_prompt:
                                st.error("Please enter an image prompt")
                            else:
                                with st.spinner("Generating image..."):
                                    image_path = st.session_state.blog_rewriter.generate_image(custom_image_prompt, custom_style)

                                    if image_path:
                                        # Store the generated image
                                        if "generated_images" not in st.session_state:
                                            st.session_state.generated_images = {}

                                        st.session_state.generated_images["custom_image"] = {
                                            "path": image_path,
                                            "caption": "Custom generated image",
                                            "placement": "Custom placement"
                                        }

                                        st.success("Image generated successfully!")
                                        st.experimental_rerun()

                    # Display the generated custom image if available
                    if "custom_image" in st.session_state.generated_images:
                        st.image(
                            st.session_state.generated_images["custom_image"]["path"],
                            caption=st.session_state.generated_images["custom_image"]["caption"],
                            use_column_width=True
                        )

            # Export options
            st.subheader("Export Options")

            col1, col2, col3 = st.columns(3)

            with col1:
                st.download_button(
                    "Download as Markdown",
                    data=st.session_state.rewritten_content["content"],
                    file_name=f"{st.session_state.rewritten_content['title'].replace(' ', '_')}.md",
                    mime="text/markdown"
                )

            with col2:
                # Create HTML version
                html_content = f"""
                <!DOCTYPE html>
                <html>
                <head>
                    <title>{st.session_state.rewritten_content['title']}</title>
                    <meta name="description" content="{st.session_state.rewritten_content.get('meta_description', '')}">
                    <style>
                        body {{ font-family: Arial, sans-serif; line-height: 1.6; max-width: 800px; margin: 0 auto; padding: 20px; }}
                        h1, h2, h3, h4, h5, h6 {{ color: #333; }}
                        img {{ max-width: 100%; height: auto; }}
                        pre {{ background-color: #f5f5f5; padding: 15px; border-radius: 5px; overflow-x: auto; }}
                        blockquote {{ border-left: 5px solid #eee; padding-left: 15px; margin-left: 0; }}
                    </style>
                </head>
                <body>
                    <h1>{st.session_state.rewritten_content['title']}</h1>
                    {st.session_state.rewritten_content['content']}
                </body>
                </html>
                """

                st.download_button(
                    "Download as HTML",
                    data=html_content,
                    file_name=f"{st.session_state.rewritten_content['title'].replace(' ', '_')}.html",
                    mime="text/html"
                )

            with col3:
                # Create JSON version with all content and metadata
                json_content = {
                    "title": st.session_state.rewritten_content["title"],
                    "meta_description": st.session_state.rewritten_content.get("meta_description", ""),
                    "content": st.session_state.rewritten_content["content"],
                    "suggested_images": st.session_state.rewritten_content.get("suggested_images", []),
                    "generated_images": [
                        {
                            "caption": img_data["caption"],
                            "placement": img_data["placement"],
                            "path": img_data["path"]
                        }
                        for img_key, img_data in st.session_state.generated_images.items()
                    ] if hasattr(st.session_state, "generated_images") else [],
                    "original_title": st.session_state.original_content.get("title", ""),
                    "original_url": st.session_state.original_content.get("url", ""),
                    "rewrite_date": datetime.now().isoformat()
                }

                st.download_button(
                    "Download as JSON",
                    data=json.dumps(json_content, indent=2),
                    file_name=f"{st.session_state.rewritten_content['title'].replace(' ', '_')}.json",
                    mime="application/json"
                )

            # Copy to clipboard buttons
            st.subheader("Quick Copy")

            col1, col2, col3 = st.columns(3)

            with col1:
                if st.button("Copy Title", key="copy_title"):
                    st.code(st.session_state.rewritten_content["title"])
                    st.success("Title copied to clipboard!")

            with col2:
                if st.button("Copy Meta Description", key="copy_meta"):
                    st.code(st.session_state.rewritten_content.get("meta_description", ""))
                    st.success("Meta description copied to clipboard!")

            with col3:
                if st.button("Copy Full Content", key="copy_content"):
                    st.success("Content copied to clipboard!")

            # Comparison with original
            with st.expander("Compare with Original", expanded=False):
                comp_col1, comp_col2 = st.columns(2)

                with comp_col1:
                    st.subheader("Original")
                    st.markdown(f"**Title:** {st.session_state.original_content.get('title', '')}")
                    if st.session_state.original_content.get("meta_description"):
                        st.markdown(f"**Meta Description:** {st.session_state.original_content['meta_description']}")
                    st.text_area(
                        "Original Content",
                        st.session_state.original_content.get("content", ""),
                        height=300,
                        disabled=True
                    )

                with comp_col2:
                    st.subheader("Rewritten")
                    st.markdown(f"**Title:** {st.session_state.rewritten_content['title']}")
                    if st.session_state.rewritten_content.get("meta_description"):
                        st.markdown(f"**Meta Description:** {st.session_state.rewritten_content['meta_description']}")
                    st.text_area(
                        "Rewritten Content",
                        st.session_state.rewritten_content["content"],
                        height=300,
                        disabled=True
                    )

            # Start over button
            if st.button("Start Over", type="primary"):
                # Reset session state
                for key in ["original_content", "content_analysis", "research_results",
                           "rewritten_content", "generated_images", "current_step"]:
                    if key in st.session_state:
                        del st.session_state[key]

                st.experimental_rerun()

if __name__ == "__main__":
    write_blog_rewriter()