Blog writer enhancements & fixes

2025-04-29 08:55:47 +05:30
parent ef462f05f2
commit 9db20db0d1
45 changed files with 3000 additions and 3290 deletions
--- a/lib/utils/alwrity_utils.py
+++ b/lib/utils/alwrity_utils.py
@@ -1,272 +1,25 @@
 import re
 import os
 import PyPDF2
-import tiktoken
 import openai
 import streamlit as st
 import tempfile
 from loguru import logger

-from lib.ai_web_researcher.gpt_online_researcher import gpt_web_researcher
-from lib.ai_writers.keywords_to_blog_streamlit import write_blog_from_keywords
-from lib.ai_writers.speech_to_blog.main_audio_to_blog import generate_audio_blog
-from lib.ai_writers.long_form_ai_writer import long_form_generator
+
 from lib.ai_writers.ai_news_article_writer import ai_news_generation
-#from lib.ai_writers.ai_agents_crew_writer import ai_agents_writers
 from lib.ai_writers.ai_financial_writer import write_basic_ta_report
 from lib.ai_writers.ai_facebook_writer.facebook_ai_writer import facebook_main_menu
 from lib.ai_writers.linkedin_writer.linkedin_ai_writer import linkedin_main_menu
 from lib.ai_writers.twitter_writers.twitter_dashboard import run_dashboard
 from lib.ai_writers.insta_ai_writer import insta_writer
 from lib.ai_writers.youtube_writers.youtube_ai_writer import youtube_main_menu
-from lib.ai_writers.web_url_ai_writer import blog_from_url
-from lib.ai_writers.image_ai_writer import blog_from_image
 from lib.ai_writers.ai_essay_writer import ai_essay_generator
 from lib.gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
-from lib.utils.voice_processing import record_voice
 #from lib.content_planning_calender.content_planning_agents_alwrity_crew import ai_agents_content_planner
 from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen


-def is_youtube_link(text):
-    if text is not None:
-        youtube_regex = re.compile(r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})')
-        return youtube_regex.match(text)
-
-
-def is_web_link(text):
-    if text is not None:
-        web_regex = re.compile(r'(https?://)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)')
-        return web_regex.match(text)
-
-
-def process_input(input_text, uploaded_file):
-    if input_text and is_youtube_link(input_text):
-        if input_text.startswith("https://www.youtube.com/") or input_text.startswith("http://www.youtube.com/"):
-            return "youtube_url"
-        else:
-            st.error("Invalid YouTube URL. Please enter a valid URL.")
-            return None
-
-    elif input_text and is_web_link(input_text):
-        return "web_url"
-    
-    elif input_text:
-        return "keywords"
-    
-    if uploaded_file is not None:
-        file_details = {"filename": uploaded_file.name, "filetype": uploaded_file.type}
-        st.write(file_details)
-        if uploaded_file.type.startswith("text/"):
-            content = uploaded_file.read().decode("utf-8")
-            st.text(content)
-
-        elif uploaded_file.type == "application/pdf":
-            return "PDF_file"
-
-        elif uploaded_file.type in ["application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/msword"]:
-            st.write("Word document uploaded. Add your DOCX processing logic here.")
-        elif uploaded_file.type.startswith("image/"):
-            st.image(uploaded_file)
-            return "image_file"
-        elif uploaded_file.type.startswith("audio/"):
-            st.audio(uploaded_file)
-            return "audio_file"
-        elif uploaded_file.type.startswith("video/"):
-            st.video(uploaded_file)
-            return "video_file"
-    return None
-
-
-def blog_from_keyword():
-    """ Input blog keywords, research and write a factual blog."""
-    st.header("Blog Content Writer")
-    col1, col2, col3 = st.columns([2, 1.5, 0.5])
-    with col1:
-        user_input = st.text_area('**👇Enter Keywords/Title/YouTube Link/Web URLs**',
-                                  help='Provide keywords, titles, YouTube links, or web URLs to generate content.',
-                                  placeholder="""Write Blog From:
-        - Keywords/Blog Title: Provide keywords to web research & write blog.
-        - Attach file: Attach Text, Audio, Video, Image file to blog on.
-        - YouTube Link: Provide a YouTube video link to convert into blog.
-        - Web URLs: Provide web URL to write similar blog on.
-        - Provide Local folder location with your documents to use for content creation.""")
-        
-    with col2:
-        uploaded_file = st.file_uploader("**👇Attach files (Audio, Video, Image, Document)**",
-                                         type=["txt", "pdf", "docx", "jpg", "jpeg", "png", "mp3", "wav", "mp4", "mkv", "avi"],
-                                         help='Attach files such as audio, video, images, or documents.')
-    with col3:
-        audio_input = record_voice()
-        if audio_input:
-            st.info(audio_input)
-
-    # Validate the provided folder path
-    #st.info("🚨 Currently supported file formats are: PDF, plain text, CSV, Excel, Markdown, PowerPoint, and Word documents.")
-
-    temp_file_path = None
-    if uploaded_file is not None:
-        # Save the uploaded file to a temporary file
-        with tempfile.NamedTemporaryFile(delete=False, suffix=uploaded_file.name) as temp_file:
-            temp_file.write(uploaded_file.read())
-            temp_file_path = temp_file.name
-
-    content_type = st.radio("**👇Select content type:**", ["Normal-length content", "Long-form content", "Experimental - AI Agents team"])
-
-    # Add an expandable section for advanced writing options
-    with st.expander("Advanced Writing Options", expanded=False):
-        # Option 1: Select content type
-        content_type = st.radio("**👇 Select content type:**", 
-                            ["Normal-length content", "Long-form content", "Experimental - AI Agents team"])
-
-        # Option 2: Checkbox for 'Create SEO tags' (Checked by default)
-        create_seo_tags = st.checkbox('Create SEO tags', value=True, 
-                                  help='Generate json-ld schema, Twitter, and Facebook tags.')
-
-        # Option 3: Checkbox for 'Generate Social Media content' (Unchecked by default)
-        generate_social_media = st.checkbox('Generate Social Media content', value=False,
-                                help="Write Facebook, Instagram posts & tweets for generated blog. Needed for marketing your blogs.")
-
-        # Option 4: Checkbox for 'Do Content Analysis & Critique' (Unchecked by default)
-        content_analysis = st.checkbox('Do Content Analysis & Critique', value=False,
-                                       help="Blog Proof reading, Critique generated blog. Provide actionable changes & Editing options.")
-
-        # Display a message at the bottom for user guidance
-        st.info("🚨 Make sure to personalize content from the sidebar. Important.")
-
-    if st.button("Write Blog"):
-        # Clear the previous results from the screen
-        st.empty()
-        if user_input == "":
-            user_input = None
-        if not uploaded_file and not user_input and not audio_input:
-            st.error("🤬🤬 Either Enter/Type/Attach, can't read your mind.(yet..)")
-            st.stop()
-        else:
-            input_type = process_input(user_input, uploaded_file)
-        
-        if input_type == "keywords":
-            if user_input and len(user_input.split()) >= 2:
-                if content_type == "Normal-length content":
-                    try:
-                        short_blog = write_blog_from_keywords(user_input)
-                        st.markdown(short_blog)
-                    except Exception as err:
-                        st.error(f"🚫 Failed to write blog on {user_input}, Error: {err}")
-                elif content_type == "Long-form content":
-                    try:
-                        long_form_generator(user_input)
-                        st.success(f"Successfully wrote long-form blog on: {user_input}")
-                    except Exception as err:
-                        st.error(f"🚫 Failed to write blog on {user_input}, Error: {err}")
-                elif content_type == "Experimental - AI Agents team":
-                    try:
-                        ai_agents_writers(user_input)
-                        st.success(f"Successfully wrote content with AI agents on: {user_input}")
-                    except Exception as err:
-                        st.error(f"🚫 Failed to Write content with AI agents: {err}")
-            else:
-                st.error('🚫 Blog keywords should be at least two words long. Please try again.')
-        
-        elif input_type == "youtube_url" or input_type == "audio_file":
-            if not generate_audio_blog(user_input):
-                st.stop()
-        
-        elif input_type == "web_url":
-            blog_from_url(user_input)
-        
-        elif input_type == "image_file":
-            blog_from_image(user_input, temp_file_path)
-
-        elif input_type == "PDF_file":
-            pdf_reader = PyPDF2.PdfReader(uploaded_file)
-            text = ""
-            combined_result = ""
-            # Create a placeholder for the progress bar
-            progress_bar = st.progress(0)
-
-            # Loop through each page with a progress bar
-            for page_num, page in enumerate(pdf_reader.pages):
-                text += page.extract_text()
-                # Replace newlines with spaces
-                text = text.replace("\n", " ")
-                # Use regex to add a space between words that are combined
-                text = re.sub(r"(\w)([A-Z])", r"\1 \2", text)
-
-                results = blog_from_pdf(text)
-                # Update the progress bar
-                progress_bar.progress((page_num + 1) / len(pdf_reader.pages))
-                combined_result += str(results[-1])
-
-            # Clear progress bar at the end
-            progress_bar.empty()
-
-            st.markdown(combined_result)
-
-
-def blog_from_pdf(pdf_text):
-    """ Load in a long PDF and pull the text out. Create a prompt to be used to extract key bits of information.
-        Chunk up our document and process each chunk to pull any answers out. Combine them at the end. 
-        This simple approach will then be extended to three more difficult questions.
-    """
-    # FixME: 
-    document = '<document>'
-    template_prompt=f'''Extract key pieces of information from the given document.
-
-        When you extract a key piece of information, include the closest page number.
-        Ex: Extracted Information (Page number)
-        \n\nDocument: \"\"\"<document>\"\"\"\n\n'''
-
-    # Initialise tokenizer
-    tokenizer = tiktoken.get_encoding("cl100k_base")
-    results = []
-    
-    chunks = create_chunks(pdf_text, 1000, tokenizer)
-    text_chunks = [tokenizer.decode(chunk) for chunk in chunks]
-
-    for chunk in text_chunks:
-        results.append(extract_chunk(chunk, template_prompt))
-
-    #zipped = list(zip(*groups))
-    #zipped = [x for y in zipped for x in y if "Not specified" not in x and "__" not in x]
-    return results
-
-
-# Split a text into smaller chunks of size n, preferably ending at the end of a sentence
-def create_chunks(text, n, tokenizer):
-    tokens = tokenizer.encode(text)
-    """Yield successive n-sized chunks from text."""
-    i = 0
-    while i < len(tokens):
-        # Find the nearest end of sentence within a range of 0.5 * n and 1.5 * n tokens
-        j = min(i + int(1.5 * n), len(tokens))
-        while j > i + int(0.5 * n):
-            # Decode the tokens and check for full stop or newline
-            chunk = tokenizer.decode(tokens[i:j])
-            if chunk.endswith(".") or chunk.endswith("\n"):
-                break
-            j -= 1
-        # If no end of sentence found, use n tokens as the chunk size
-        if j == i + int(0.5 * n):
-            j = min(i + n, len(tokens))
-        yield tokens[i:j]
-        i = j
-
-
-def extract_chunk(document, template_prompt):
-    """ Chunking for large documents, exceed context window"""
-    client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
-    prompt = template_prompt.replace('<document>', document)
-
-    try:
-        response = llm_text_gen(prompt)
-        return response
-    except Exception as err:
-        logger.error(f"Exit: Failed to get response from LLM: {err}")
-        exit(1)
-
-
-
 def ai_agents_team():
    # Define options for AI Content Teams
    st.title("🐲 Your AI Agents Teams")
@@ -316,9 +69,9 @@ def content_agents():
        if content_keywords and len(content_keywords.split()) >= 2:
            with st.spinner("Generating Content..."):
                try:
-                    calendar_content = ai_agents_writers(content_keywords)
-                    st.success(f"Successfully generated content for: {content_keywords}")
-                    st.markdown(calendar_content)
+                    #calendar_content = ai_agents_writers(content_keywords)
+                    st.success(f"🚫 Not implemented yet: {content_keywords}")
+                    #st.markdown(calendar_content)
                except Exception as err:
                    st.error(f"🚫 Failed to generate content with AI Agents: {err}")
        else:
@@ -474,4 +227,4 @@ def ai_social_writer():
    elif "instagram" in selected_platform:
        insta_writer()
    elif "youtube" in selected_platform:
-        youtube_main_menu()
+        youtube_main_menu()