WIP - UI, Audio, firecrawl, long-form - V0.5

2024-06-20 22:48:52 +05:30
parent 899abad1ba
commit 074ddf6210
12 changed files with 206 additions and 131 deletions
--- a/lib/ai_writers/keywords_to_blog_streamlit.py
+++ b/lib/ai_writers/keywords_to_blog_streamlit.py
@@ -1,21 +1,27 @@
 import sys
 import os
+import asyncio
 from textwrap import dedent
 from pathlib import Path
 from datetime import datetime
 import streamlit as st
-
+from gtts import gTTS
+import base64
 from dotenv import load_dotenv
+
+# Load environment variables
 load_dotenv(Path('../../.env'))
+# Logger setup
 from loguru import logger
 logger.remove()
 logger.add(sys.stdout,
-        colorize=True,
-        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
-    )
+           colorize=True,
+           format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}")

-from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search,\
-        do_tavily_ai_search, do_metaphor_ai_research, do_google_pytrends_analysis
+# Import other necessary modules
+from ..ai_web_researcher.gpt_online_researcher import (
+        do_google_serp_search, do_tavily_ai_search, 
+        do_metaphor_ai_research, do_google_pytrends_analysis)
 from .blog_from_google_serp import write_blog_google_serp, blog_with_research
 from ..ai_web_researcher.you_web_reseacher import get_rag_results, search_ydc_index
 from ..blog_metadata.get_blog_metadata import blog_metadata
@@ -23,6 +29,21 @@ from ..blog_postprocessing.save_blog_to_file import save_blog_to_file
 from ..gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image


+# Function to convert text to speech and save as an audio file
+def text_to_speech(text, lang='en'):
+    tts = gTTS(text=text, lang=lang)
+    tts.save("output.mp3")
+    return "output.mp3"
+
+
+# Function to get audio file as a downloadable link
+def get_audio_file(audio_file):
+    with open(audio_file, "rb") as file:
+        data = file.read()
+        b64_data = base64.b64encode(data).decode()
+        return f'<a href="data:audio/mp3;base64,{b64_data}" download="output.mp3">Download audio file</a>'
+
+
 def write_blog_from_keywords(search_keywords, url=None):
    """
    This function will take a blog Topic to first generate sections for it
@@ -45,8 +66,8 @@ def write_blog_from_keywords(search_keywords, url=None):

            status.update(label=f"🛀 Starting Tavily AI research: {search_keywords}")
            tavily_search_result, t_titles, t_answer = do_tavily_ai_search(search_keywords)
-            status.update(label=f"🙆 Finished Google Search & Tavily AI Search on: {search_keywords}", 
-                    state="complete", expanded=False)
+            status.update(label=f"🙆 Finished Google Search & Tavily AI Search on: {search_keywords}",
+                          state="complete", expanded=False)

        except Exception as err:
            st.error(f"Failed in web research: {err}")
@@ -66,21 +87,21 @@ def write_blog_from_keywords(search_keywords, url=None):

    # logger.info/check the final blog content.
    logger.info("######### Draft1: Finished Blog from Google web search: ###########")
-    
+
    with st.status("Started Writing blog from Tavily Web search..", expanded=True) as status:
-        # Do Tavily AI research to augument the above blog.
+        # Do Tavily AI research to augment the above blog.
        try:
-            #example_blog_titles.append(t_titles)
+            # example_blog_titles.append(t_titles)
            if blog_markdown_str and tavily_search_result:
                logger.info(f"\n\n######### Blog content after Tavily AI research: ######### \n\n")
                blog_markdown_str = write_blog_google_serp(search_keywords, tavily_search_result)
-                status.update(label="Finished Writing Blog From Tavily Results:{blog_markdown_str}", expanded=True)
+                status.update(label=f"Finished Writing Blog From Tavily Results:{blog_markdown_str}", expanded=True)
        except Exception as err:
            logger.error(f"Failed to do Tavily AI research: {err}")

        status.update(label="🙎 Generating - Title, Meta Description, Tags, Categories for the content.", expanded=True)
        try:
-            blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str)
+            blog_title, blog_meta_desc, blog_tags, blog_categories = asyncio.run(blog_metadata(blog_markdown_str))
        except Exception as err:
            st.error(f"Failed to get blog metadata: {err}")

@@ -94,38 +115,21 @@ def write_blog_from_keywords(search_keywords, url=None):
        except Exception as err:
            st.warning(f"Failed in Image generation: {err}")

-        saved_blog_to_file = save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc, 
-                            blog_tags, blog_categories, generated_image_filepath)
+        saved_blog_to_file = save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc,
+                                               blog_tags, blog_categories, generated_image_filepath)
        status.update(label=f"Saved the content in this file: {saved_blog_to_file}")
        logger.info(f"\n\n --------- Finished writing Blog for : {search_keywords} -------------- \n")
-       
-        # Render the result on streamlit UI
-        st.image(generated_image_filepath)
-        st.markdown(f"{blog_markdown_str}")
-        status.update(label=f"Finished, Review & Use your Original Content Below: {saved_blog_to_file}", state="complete")

-        # Display options below the content
-        col1, col2, col3, col4, col5 = st.columns(5)
-        if col1.button('Copy'):
-            pyperclip.copy(blog_markdown_str)
-            st.success("Text copied to clipboard!")
-        
-        if col2.button('Rephrase'):
-            rephrased_text = rephrase_text(blog_markdown_str)
-            st.markdown(rephrased_text)
-        
-        if col3.button('Change Tone'):
-            tone = st.selectbox("Select Tone", ["Formal", "Casual", "Professional"])
-            if st.button("Apply Tone"):
-                toned_text = change_tone(blog_markdown_str, tone)
-                st.markdown(toned_text)
-        
-        if col4.button('Make Shorter'):
-            shorter_text = make_shorter(blog_markdown_str)
-            st.markdown(shorter_text)
-        
-        if col5.button('Translate'):
-            language = st.selectbox("Select Language", ["Spanish", "French", "German"])
-            if st.button("Translate"):
-                translated_text = translate_text(blog_markdown_str, language)
-                st.markdown(translated_text)
+        # Render the result on streamlit UI
+        if generated_image_filepath:
+            st.image(generated_image_filepath)
+        st.markdown(f"{blog_markdown_str}")
+        status.update(label=f"Finished, Review & Use your Original Content Below: {saved_blog_to_file}",
+                      state="complete")
+
+        # Passing the text and language to the engine, here we have marked slow=False. Which tells
+        # the module that the converted audio should have a high speed
+        tts = gTTS(text=blog_markdown_str, lang='en', slow=False)
+        # Saving the converted audio in a mp3 file
+        tts.save("delete_me.mp3")
+        st.audio("delete_me.mp3")
--- a/lib/ai_writers/long_form_ai_writer.py
+++ b/lib/ai_writers/long_form_ai_writer.py
@@ -124,15 +124,15 @@ def long_form_generator(content_keywords):
        # Configure generative AI
        load_dotenv(Path('../.env'))
        generation_config = {
-           "temperature": 0.6,
+           "temperature": 0.7,
           "top_p": 1,
           "max_output_tokens": 8096,
        }
        
        genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
        # Initialize the generative model
-        #model = genai.GenerativeModel('gemini-pro', generation_config=generation_config)
-        model_pro = genai.GenerativeModel('gemini-1.5-flash', generation_config=generation_config)
+        model = genai.GenerativeModel('gemini-1.5-flash', generation_config=generation_config)
+        model_pro = genai.GenerativeModel('gemini-pro', generation_config=generation_config)
        
        # Do SERP web research for given keywords to generate title and outline.
        web_research_result, g_titles = do_google_serp_search(content_keywords)
@@ -203,14 +203,14 @@ def long_form_generator(content_keywords):
        logger.info(f"Writing in progress... Current draft length: {len(draft)} characters")
        status.update(label=f"Writing in progress... Current draft length: {len(draft)} characters")
        search_terms = f"""
-            I will provide you with blog outline, your task is to read the outline & return 8 google search keywords.
+            I will provide you with content outline below, your task is to read the outline & return 8 google search keywords.
            Your response will be used to do web research for writing on the given outline.
            Do not explain your response, provide 8 google search sentences encompassing the given content outline.
-            Provide the search term results as comma separated values.\n\n
+            Important: Provide the search term results as comma separated values.\n\n
            Content Outline:\n
            '{content_outline}'
            """
-        search_words = generate_with_retry(model_pro, search_terms).text
+        search_words = generate_with_retry(model, search_terms).text
        status.update(label=f"Search terms from written draft: {search_words}")
        
        while 'IAMDONE' not in continuation:
@@ -218,6 +218,7 @@ def long_form_generator(content_keywords):
            str_list = re.split(r',\s*', search_words)
            # Strip quotes from each element 
            str_list = [s.strip('\'"') for s in str_list]
+
            for search_term in str_list:
                web_research_result, m_titles, t_titles = do_tavily_ai_search(search_term, max_results=5)
                try:
--- a/lib/ai_writers/web_url_ai_writer.py
+++ b/lib/ai_writers/web_url_ai_writer.py
@@ -17,7 +17,7 @@ logger.add(sys.stdout,
    )

 from ..ai_web_researcher.firecrawl_web_crawler import scrape_url
-from ..blog_metadata.get_blog_metadata import blog_metadata
+from ..blog_metadata.get_blog_metadata import blog_metadata, run_async
 from ..blog_postprocessing.save_blog_to_file import save_blog_to_file
 from ..gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
 from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
@@ -31,7 +31,11 @@ def blog_from_url(weburl):
    # Use to store the blog in a string, to save in a *.md file.
    blog_markdown_str = None
    tavily_search_result = None
-    example_blog_titles = []
+    # Initializing the variables
+    blog_title = None
+    blog_meta_desc = None
+    blog_tags = None
+    blog_categories = None

    logger.info(f"Researching and Writing Blog on: {weburl}")
    with st.status("Started Writing..", expanded=True) as status:
@@ -39,12 +43,12 @@ def blog_from_url(weburl):
        status.update(label=f"Researching and Writing Blog on: {weburl}")
        try:
            scraped_text = scrape_url(weburl)
-            logger.info(scraped_text)
+            #logger.info(scraped_text)
        except Exception as err:
            st.error(f"Failed to scrape web page from url-{weburl} - Error: {err}")
            logger.error(f"Failed in web research: {err}")
            st.stop()
-        status.update(label="Successfully Scraped/Fetched url: {weburl}", expanded=False, state="complete")
+        status.update(label=f"Successfully Scraped/Fetched url: {weburl}", expanded=False, state="complete")

    with st.status(f"Started Writing blog from {weburl}..", expanded=True) as status:
        # Do Tavily AI research to augument the above blog.
@@ -58,7 +62,7 @@ def blog_from_url(weburl):

        try:
            status.update(label="🙎 Generating - Title, Meta Description, Tags, Categories for the content.")
-            blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str)
+            blog_title, blog_meta_desc, blog_tags, blog_categories = run_async(blog_metadata(blog_markdown_str))
        except Exception as err:
            st.error(f"Failed to get blog metadata: {err}")

@@ -71,8 +75,11 @@ def blog_from_url(weburl):
        saved_blog_to_file = save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc, 
                            blog_tags, blog_categories, generated_image_filepath)
        status.update(label=f"Saved the content in this file: {saved_blog_to_file}")
+        
        logger.info(f"\n\n --------- Finished writing Blog for : {weburl} -------------- \n")
-        st.image(generated_image_filepath)
+        if generated_image_filepath:
+            st.image(generated_image_filepath)
+        
        st.markdown(f"{blog_markdown_str}")
        status.update(label=f"Finished, Review & Use your Original Content Below: {saved_blog_to_file}", state="complete")