Blog writer enhancements & fixes

2025-04-29 08:55:47 +05:30
parent ef462f05f2
commit 9db20db0d1
45 changed files with 3000 additions and 3290 deletions
--- a/lib/utils/alwrity_utils.py
+++ b/lib/utils/alwrity_utils.py
@@ -1,272 +1,25 @@
 import re
 import os
 import PyPDF2
-import tiktoken
 import openai
 import streamlit as st
 import tempfile
 from loguru import logger

-from lib.ai_web_researcher.gpt_online_researcher import gpt_web_researcher
-from lib.ai_writers.keywords_to_blog_streamlit import write_blog_from_keywords
-from lib.ai_writers.speech_to_blog.main_audio_to_blog import generate_audio_blog
-from lib.ai_writers.long_form_ai_writer import long_form_generator
+
 from lib.ai_writers.ai_news_article_writer import ai_news_generation
-#from lib.ai_writers.ai_agents_crew_writer import ai_agents_writers
 from lib.ai_writers.ai_financial_writer import write_basic_ta_report
 from lib.ai_writers.ai_facebook_writer.facebook_ai_writer import facebook_main_menu
 from lib.ai_writers.linkedin_writer.linkedin_ai_writer import linkedin_main_menu
 from lib.ai_writers.twitter_writers.twitter_dashboard import run_dashboard
 from lib.ai_writers.insta_ai_writer import insta_writer
 from lib.ai_writers.youtube_writers.youtube_ai_writer import youtube_main_menu
-from lib.ai_writers.web_url_ai_writer import blog_from_url
-from lib.ai_writers.image_ai_writer import blog_from_image
 from lib.ai_writers.ai_essay_writer import ai_essay_generator
 from lib.gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
-from lib.utils.voice_processing import record_voice
 #from lib.content_planning_calender.content_planning_agents_alwrity_crew import ai_agents_content_planner
 from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen


-def is_youtube_link(text):
-    if text is not None:
-        youtube_regex = re.compile(r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})')
-        return youtube_regex.match(text)
-
-
-def is_web_link(text):
-    if text is not None:
-        web_regex = re.compile(r'(https?://)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)')
-        return web_regex.match(text)
-
-
-def process_input(input_text, uploaded_file):
-    if input_text and is_youtube_link(input_text):
-        if input_text.startswith("https://www.youtube.com/") or input_text.startswith("http://www.youtube.com/"):
-            return "youtube_url"
-        else:
-            st.error("Invalid YouTube URL. Please enter a valid URL.")
-            return None
-
-    elif input_text and is_web_link(input_text):
-        return "web_url"
-    
-    elif input_text:
-        return "keywords"
-    
-    if uploaded_file is not None:
-        file_details = {"filename": uploaded_file.name, "filetype": uploaded_file.type}
-        st.write(file_details)
-        if uploaded_file.type.startswith("text/"):
-            content = uploaded_file.read().decode("utf-8")
-            st.text(content)
-
-        elif uploaded_file.type == "application/pdf":
-            return "PDF_file"
-
-        elif uploaded_file.type in ["application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/msword"]:
-            st.write("Word document uploaded. Add your DOCX processing logic here.")
-        elif uploaded_file.type.startswith("image/"):
-            st.image(uploaded_file)
-            return "image_file"
-        elif uploaded_file.type.startswith("audio/"):
-            st.audio(uploaded_file)
-            return "audio_file"
-        elif uploaded_file.type.startswith("video/"):
-            st.video(uploaded_file)
-            return "video_file"
-    return None
-
-
-def blog_from_keyword():
-    """ Input blog keywords, research and write a factual blog."""
-    st.header("Blog Content Writer")
-    col1, col2, col3 = st.columns([2, 1.5, 0.5])
-    with col1:
-        user_input = st.text_area('**👇Enter Keywords/Title/YouTube Link/Web URLs**',
-                                  help='Provide keywords, titles, YouTube links, or web URLs to generate content.',
-                                  placeholder="""Write Blog From:
-        - Keywords/Blog Title: Provide keywords to web research & write blog.
-        - Attach file: Attach Text, Audio, Video, Image file to blog on.
-        - YouTube Link: Provide a YouTube video link to convert into blog.
-        - Web URLs: Provide web URL to write similar blog on.
-        - Provide Local folder location with your documents to use for content creation.""")
-        
-    with col2:
-        uploaded_file = st.file_uploader("**👇Attach files (Audio, Video, Image, Document)**",
-                                         type=["txt", "pdf", "docx", "jpg", "jpeg", "png", "mp3", "wav", "mp4", "mkv", "avi"],
-                                         help='Attach files such as audio, video, images, or documents.')
-    with col3:
-        audio_input = record_voice()
-        if audio_input:
-            st.info(audio_input)
-
-    # Validate the provided folder path
-    #st.info("🚨 Currently supported file formats are: PDF, plain text, CSV, Excel, Markdown, PowerPoint, and Word documents.")
-
-    temp_file_path = None
-    if uploaded_file is not None:
-        # Save the uploaded file to a temporary file
-        with tempfile.NamedTemporaryFile(delete=False, suffix=uploaded_file.name) as temp_file:
-            temp_file.write(uploaded_file.read())
-            temp_file_path = temp_file.name
-
-    content_type = st.radio("**👇Select content type:**", ["Normal-length content", "Long-form content", "Experimental - AI Agents team"])
-
-    # Add an expandable section for advanced writing options
-    with st.expander("Advanced Writing Options", expanded=False):
-        # Option 1: Select content type
-        content_type = st.radio("**👇 Select content type:**", 
-                            ["Normal-length content", "Long-form content", "Experimental - AI Agents team"])
-
-        # Option 2: Checkbox for 'Create SEO tags' (Checked by default)
-        create_seo_tags = st.checkbox('Create SEO tags', value=True, 
-                                  help='Generate json-ld schema, Twitter, and Facebook tags.')
-
-        # Option 3: Checkbox for 'Generate Social Media content' (Unchecked by default)
-        generate_social_media = st.checkbox('Generate Social Media content', value=False,
-                                help="Write Facebook, Instagram posts & tweets for generated blog. Needed for marketing your blogs.")
-
-        # Option 4: Checkbox for 'Do Content Analysis & Critique' (Unchecked by default)
-        content_analysis = st.checkbox('Do Content Analysis & Critique', value=False,
-                                       help="Blog Proof reading, Critique generated blog. Provide actionable changes & Editing options.")
-
-        # Display a message at the bottom for user guidance
-        st.info("🚨 Make sure to personalize content from the sidebar. Important.")
-
-    if st.button("Write Blog"):
-        # Clear the previous results from the screen
-        st.empty()
-        if user_input == "":
-            user_input = None
-        if not uploaded_file and not user_input and not audio_input:
-            st.error("🤬🤬 Either Enter/Type/Attach, can't read your mind.(yet..)")
-            st.stop()
-        else:
-            input_type = process_input(user_input, uploaded_file)
-        
-        if input_type == "keywords":
-            if user_input and len(user_input.split()) >= 2:
-                if content_type == "Normal-length content":
-                    try:
-                        short_blog = write_blog_from_keywords(user_input)
-                        st.markdown(short_blog)
-                    except Exception as err:
-                        st.error(f"🚫 Failed to write blog on {user_input}, Error: {err}")
-                elif content_type == "Long-form content":
-                    try:
-                        long_form_generator(user_input)
-                        st.success(f"Successfully wrote long-form blog on: {user_input}")
-                    except Exception as err:
-                        st.error(f"🚫 Failed to write blog on {user_input}, Error: {err}")
-                elif content_type == "Experimental - AI Agents team":
-                    try:
-                        ai_agents_writers(user_input)
-                        st.success(f"Successfully wrote content with AI agents on: {user_input}")
-                    except Exception as err:
-                        st.error(f"🚫 Failed to Write content with AI agents: {err}")
-            else:
-                st.error('🚫 Blog keywords should be at least two words long. Please try again.')
-        
-        elif input_type == "youtube_url" or input_type == "audio_file":
-            if not generate_audio_blog(user_input):
-                st.stop()
-        
-        elif input_type == "web_url":
-            blog_from_url(user_input)
-        
-        elif input_type == "image_file":
-            blog_from_image(user_input, temp_file_path)
-
-        elif input_type == "PDF_file":
-            pdf_reader = PyPDF2.PdfReader(uploaded_file)
-            text = ""
-            combined_result = ""
-            # Create a placeholder for the progress bar
-            progress_bar = st.progress(0)
-
-            # Loop through each page with a progress bar
-            for page_num, page in enumerate(pdf_reader.pages):
-                text += page.extract_text()
-                # Replace newlines with spaces
-                text = text.replace("\n", " ")
-                # Use regex to add a space between words that are combined
-                text = re.sub(r"(\w)([A-Z])", r"\1 \2", text)
-
-                results = blog_from_pdf(text)
-                # Update the progress bar
-                progress_bar.progress((page_num + 1) / len(pdf_reader.pages))
-                combined_result += str(results[-1])
-
-            # Clear progress bar at the end
-            progress_bar.empty()
-
-            st.markdown(combined_result)
-
-
-def blog_from_pdf(pdf_text):
-    """ Load in a long PDF and pull the text out. Create a prompt to be used to extract key bits of information.
-        Chunk up our document and process each chunk to pull any answers out. Combine them at the end. 
-        This simple approach will then be extended to three more difficult questions.
-    """
-    # FixME: 
-    document = '<document>'
-    template_prompt=f'''Extract key pieces of information from the given document.
-
-        When you extract a key piece of information, include the closest page number.
-        Ex: Extracted Information (Page number)
-        \n\nDocument: \"\"\"<document>\"\"\"\n\n'''
-
-    # Initialise tokenizer
-    tokenizer = tiktoken.get_encoding("cl100k_base")
-    results = []
-    
-    chunks = create_chunks(pdf_text, 1000, tokenizer)
-    text_chunks = [tokenizer.decode(chunk) for chunk in chunks]
-
-    for chunk in text_chunks:
-        results.append(extract_chunk(chunk, template_prompt))
-
-    #zipped = list(zip(*groups))
-    #zipped = [x for y in zipped for x in y if "Not specified" not in x and "__" not in x]
-    return results
-
-
-# Split a text into smaller chunks of size n, preferably ending at the end of a sentence
-def create_chunks(text, n, tokenizer):
-    tokens = tokenizer.encode(text)
-    """Yield successive n-sized chunks from text."""
-    i = 0
-    while i < len(tokens):
-        # Find the nearest end of sentence within a range of 0.5 * n and 1.5 * n tokens
-        j = min(i + int(1.5 * n), len(tokens))
-        while j > i + int(0.5 * n):
-            # Decode the tokens and check for full stop or newline
-            chunk = tokenizer.decode(tokens[i:j])
-            if chunk.endswith(".") or chunk.endswith("\n"):
-                break
-            j -= 1
-        # If no end of sentence found, use n tokens as the chunk size
-        if j == i + int(0.5 * n):
-            j = min(i + n, len(tokens))
-        yield tokens[i:j]
-        i = j
-
-
-def extract_chunk(document, template_prompt):
-    """ Chunking for large documents, exceed context window"""
-    client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
-    prompt = template_prompt.replace('<document>', document)
-
-    try:
-        response = llm_text_gen(prompt)
-        return response
-    except Exception as err:
-        logger.error(f"Exit: Failed to get response from LLM: {err}")
-        exit(1)
-
-
-
 def ai_agents_team():
    # Define options for AI Content Teams
    st.title("🐲 Your AI Agents Teams")
@@ -316,9 +69,9 @@ def content_agents():
        if content_keywords and len(content_keywords.split()) >= 2:
            with st.spinner("Generating Content..."):
                try:
-                    calendar_content = ai_agents_writers(content_keywords)
-                    st.success(f"Successfully generated content for: {content_keywords}")
-                    st.markdown(calendar_content)
+                    #calendar_content = ai_agents_writers(content_keywords)
+                    st.success(f"🚫 Not implemented yet: {content_keywords}")
+                    #st.markdown(calendar_content)
                except Exception as err:
                    st.error(f"🚫 Failed to generate content with AI Agents: {err}")
        else:
@@ -474,4 +227,4 @@ def ai_social_writer():
    elif "instagram" in selected_platform:
        insta_writer()
    elif "youtube" in selected_platform:
-        youtube_main_menu()
+        youtube_main_menu()
--- a/lib/utils/content_generators.py
+++ b/lib/utils/content_generators.py
@@ -1,57 +1,11 @@
 import streamlit as st
-from lib.utils.alwrity_utils import (
-    blog_from_keyword, ai_agents_team, essay_writer, ai_news_writer,
-    ai_finance_ta_writer
-)
+
 from lib.alwrity_ui.similar_analysis import competitor_analysis
 from lib.alwrity_ui.keyword_web_researcher import do_web_research
-from lib.ai_writers.ai_story_writer.story_writer import story_input_section
-from lib.ai_writers.ai_product_description_writer import write_ai_prod_desc
-from lib.ai_writers.ai_copywriter.copywriter_dashboard import copywriter_dashboard
-from lib.ai_writers.linkedin_writer import LinkedInAIWriter
-#from lib.content_planning_calender.content_planning_agents_alwrity_crew import ai_agents_content_planner
-
-
-def ai_writers():
-    options = [
-        "AI Blog Writer",
-        "Story Writer",
-        "Essay writer",
-        "Write News reports",
-        "Write Financial TA report",
-        "AI Product Description Writer",
-        "AI Copywriter",
-        "LinkedIn AI Writer",
-        "Quit"
-    ]
-    choice = st.selectbox("**👇Select a content creation type:**", options, index=0, format_func=lambda x: f"📝 {x}")
-
-    if choice == "AI Blog Writer":
-        blog_from_keyword()
-    elif choice == "Story Writer":
-        story_input_section()
-    elif choice == "Essay writer":
-        essay_writer()
-    elif choice == "Write News reports":
-        ai_news_writer()
-    elif choice == "Write Financial TA report":
-        ai_finance_ta_writer()
-    elif choice == "AI Product Description Writer":
-        write_ai_prod_desc()
-    elif choice == "AI Copywriter":
-        # Initialize the copywriter dashboard
-        copywriter_dashboard()
-    elif choice == "LinkedIn AI Writer":
-        # Initialize the LinkedIn AI Writer
-        linkedin_writer = LinkedInAIWriter()
-        linkedin_writer.run()
-    elif choice == "Quit":
-        st.info("Thank you for using Alwrity. Goodbye!")
-        st.stop()


 def content_planning_tools():
-    # Add custom CSS for compact layout
+    # A custom CSS for compact layout
    st.markdown("""
        <style>
            /* Reduce top padding of main container */
--- a/lib/utils/ui_setup.py
+++ b/lib/utils/ui_setup.py
@@ -1,10 +1,11 @@
 import os
 import streamlit as st
 from lib.utils.file_processor import load_image
-from lib.utils.content_generators import content_planning_tools, ai_writers
+from lib.utils.content_generators import content_planning_tools
 from lib.utils.alwrity_utils import ai_social_writer
 from lib.utils.seo_tools import ai_seo_tools
 from lib.utils.settings_page import render_settings_page
+from loguru import logger

 # Import social media writer functions
 from lib.ai_writers.ai_facebook_writer.facebook_ai_writer import facebook_main_menu
@@ -12,6 +13,7 @@ from lib.ai_writers.linkedin_writer.linkedin_ai_writer import linkedin_main_menu
 from lib.ai_writers.twitter_writers import run_dashboard
 from lib.ai_writers.insta_ai_writer import insta_writer
 from lib.ai_writers.youtube_writers.youtube_ai_writer import youtube_main_menu
+from lib.ai_writers.ai_writer_dashboard import get_ai_writers, list_ai_writers


 def setup_ui():
@@ -295,22 +297,26 @@ def setup_ui():

 def setup_alwrity_ui():
    """Sets up the main navigation in the sidebar."""
+    logger.info("Setting up ALwrity UI")
+    
    # Initialize session state for active tab if not exists
    if 'active_tab' not in st.session_state:
        st.session_state.active_tab = "Content Planning"
+        logger.info(f"Initialized active_tab to: {st.session_state.active_tab}")
    
    # Initialize session state for active sub-tab if not exists
    if 'active_sub_tab' not in st.session_state:
        st.session_state.active_sub_tab = None
+        logger.info("Initialized active_sub_tab to None")

    # Define the navigation items with their icons and functions
    nav_items = {
+        "AI Writers": ("📝", get_ai_writers),
        "Content Planning": ("📅", content_planning_tools),
-        "AI Writers": ("📝", ai_writers),
-        "Agents Teams": ("🤝", lambda: st.subheader("Agents Teams - Coming Soon!")),
        "AI SEO Tools": ("🔍", ai_seo_tools),
        "AI Social Tools": ("📱", None),  # Set to None as we'll handle this separately
-        "Ask Alwrity": ("💬", lambda: (
+        "Agents Teams(TBD)": ("🤝", lambda: st.subheader("Agents Teams - Coming Soon!")),
+        "Ask Alwrity(TBD)": ("💬", lambda: (
            st.subheader("Chat with your Data, Chat with any Data.. COMING SOON !"),
            st.markdown("Create a collection by uploading files (PDF, MD, CSV, etc), or crawl a data source (Websites, more sources coming soon."),
            st.markdown("One can ask/chat, summarize and do semantic search over the uploaded data")
@@ -318,6 +324,8 @@ def setup_alwrity_ui():
        "ALwrity Settings": ("⚙️", render_settings_page)
    }
    
+    logger.info(f"Defined {len(nav_items)} navigation items")
+
    # Define sub-menu items for AI Social Tools
    social_tools_submenu = {
        "Facebook": ("📘", lambda: facebook_main_menu()),
@@ -326,6 +334,8 @@ def setup_alwrity_ui():
        "Instagram": ("📸", lambda: insta_writer()),
        "YouTube": ("🎥", lambda: youtube_main_menu())
    }
+    
+    logger.info(f"Defined {len(social_tools_submenu)} social tools submenu items")

    # Create sidebar navigation
    st.sidebar.markdown("### ALwrity Options")
@@ -342,6 +352,7 @@ def setup_alwrity_ui():
                st.session_state.active_tab = name
                # Reset sub-tab when main tab changes
                st.session_state.active_sub_tab = None
+                logger.info(f"Selected main tab: {name}")
            
            # If AI Social Tools is active, show the sub-menu
            if st.session_state.active_tab == "AI Social Tools":
@@ -367,6 +378,7 @@ def setup_alwrity_ui():
                    if st.sidebar.button(f"{sub_icon} {sub_name}", key=button_key, 
                                       help=f"Navigate to {sub_name}", use_container_width=True):
                        st.session_state.active_sub_tab = sub_name
+                        logger.info(f"Selected social tool: {sub_name}")
                    
                    # Close the div
                    st.sidebar.markdown('</div>', unsafe_allow_html=True)
@@ -379,6 +391,7 @@ def setup_alwrity_ui():
                st.session_state.active_tab = name
                # Reset sub-tab when main tab changes
                st.session_state.active_sub_tab = None
+                logger.info(f"Selected main tab: {name}")

    st.sidebar.markdown('</div>', unsafe_allow_html=True)

@@ -427,13 +440,47 @@ def setup_alwrity_ui():
            # Call the function directly without any title
            social_tools_submenu[st.session_state.active_sub_tab][1]()
    else:
-        st.markdown("""
-            <style>
-                .main .block-container {
-                    padding-top: 0.25rem !important;
-                    padding-bottom: 0;
-                }
-            </style>
-        """, unsafe_allow_html=True)
-        st.title(f"{nav_items[st.session_state.active_tab][0]} {st.session_state.active_tab}")
-        nav_items[st.session_state.active_tab][1]()
+        # Check if we're in the AI Writers section and handle writer selection
+        if st.session_state.active_tab == "AI Writers":
+            # Get the writer parameter from the URL using st.query_params
+            writer = st.query_params.get("writer")
+            logger.info(f"Current writer from query params: {writer}")
+            
+            if writer:
+                # Get the list of writers without rendering the dashboard
+                writers = list_ai_writers()
+                logger.info(f"Found {len(writers)} writers")
+                
+                writer_found = False
+                for w in writers:
+                    logger.info(f"Checking writer: {w['name']} with path: {w['path']}")
+                    if w["path"] == writer:
+                        writer_found = True
+                        logger.info(f"Found matching writer: {w['name']}, executing function")
+                        # Clear any existing content
+                        st.empty()
+                        # Execute the writer function
+                        w["function"]()
+                        break
+                
+                if not writer_found:
+                    logger.error(f"No writer found with path: {writer}")
+                    st.error(f"No writer found with path: {writer}")
+            else:
+                # If no writer selected, show the dashboard
+                logger.info("No writer selected, showing dashboard")
+                get_ai_writers()
+        else:
+            # For all other tabs, show the title
+            st.markdown("""
+                <style>
+                    .main .block-container {
+                        padding-top: 0.25rem !important;
+                        padding-bottom: 0;
+                    }
+                </style>
+            """, unsafe_allow_html=True)
+            st.title(f"{nav_items[st.session_state.active_tab][0]} {st.session_state.active_tab}")
+            nav_items[st.session_state.active_tab][1]()
+    
+    logger.info("Finished setting up ALwrity UI")