WIP - UI, Audio, firecrawl, long-form - V0.5

This commit is contained in:
ajaysi
2024-06-20 22:48:52 +05:30
parent 899abad1ba
commit 074ddf6210
12 changed files with 206 additions and 131 deletions

View File

@@ -1,21 +1,27 @@
import sys
import os
import asyncio
from textwrap import dedent
from pathlib import Path
from datetime import datetime
import streamlit as st
from gtts import gTTS
import base64
from dotenv import load_dotenv
# Load environment variables
load_dotenv(Path('../../.env'))
# Logger setup
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}")
from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search,\
do_tavily_ai_search, do_metaphor_ai_research, do_google_pytrends_analysis
# Import other necessary modules
from ..ai_web_researcher.gpt_online_researcher import (
do_google_serp_search, do_tavily_ai_search,
do_metaphor_ai_research, do_google_pytrends_analysis)
from .blog_from_google_serp import write_blog_google_serp, blog_with_research
from ..ai_web_researcher.you_web_reseacher import get_rag_results, search_ydc_index
from ..blog_metadata.get_blog_metadata import blog_metadata
@@ -23,6 +29,21 @@ from ..blog_postprocessing.save_blog_to_file import save_blog_to_file
from ..gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
# Function to convert text to speech and save as an audio file
def text_to_speech(text, lang='en'):
tts = gTTS(text=text, lang=lang)
tts.save("output.mp3")
return "output.mp3"
# Function to get audio file as a downloadable link
def get_audio_file(audio_file):
with open(audio_file, "rb") as file:
data = file.read()
b64_data = base64.b64encode(data).decode()
return f'<a href="data:audio/mp3;base64,{b64_data}" download="output.mp3">Download audio file</a>'
def write_blog_from_keywords(search_keywords, url=None):
"""
This function will take a blog Topic to first generate sections for it
@@ -45,8 +66,8 @@ def write_blog_from_keywords(search_keywords, url=None):
status.update(label=f"🛀 Starting Tavily AI research: {search_keywords}")
tavily_search_result, t_titles, t_answer = do_tavily_ai_search(search_keywords)
status.update(label=f"🙆 Finished Google Search & Tavily AI Search on: {search_keywords}",
state="complete", expanded=False)
status.update(label=f"🙆 Finished Google Search & Tavily AI Search on: {search_keywords}",
state="complete", expanded=False)
except Exception as err:
st.error(f"Failed in web research: {err}")
@@ -66,21 +87,21 @@ def write_blog_from_keywords(search_keywords, url=None):
# logger.info/check the final blog content.
logger.info("######### Draft1: Finished Blog from Google web search: ###########")
with st.status("Started Writing blog from Tavily Web search..", expanded=True) as status:
# Do Tavily AI research to augument the above blog.
# Do Tavily AI research to augment the above blog.
try:
#example_blog_titles.append(t_titles)
# example_blog_titles.append(t_titles)
if blog_markdown_str and tavily_search_result:
logger.info(f"\n\n######### Blog content after Tavily AI research: ######### \n\n")
blog_markdown_str = write_blog_google_serp(search_keywords, tavily_search_result)
status.update(label="Finished Writing Blog From Tavily Results:{blog_markdown_str}", expanded=True)
status.update(label=f"Finished Writing Blog From Tavily Results:{blog_markdown_str}", expanded=True)
except Exception as err:
logger.error(f"Failed to do Tavily AI research: {err}")
status.update(label="🙎 Generating - Title, Meta Description, Tags, Categories for the content.", expanded=True)
try:
blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str)
blog_title, blog_meta_desc, blog_tags, blog_categories = asyncio.run(blog_metadata(blog_markdown_str))
except Exception as err:
st.error(f"Failed to get blog metadata: {err}")
@@ -94,38 +115,21 @@ def write_blog_from_keywords(search_keywords, url=None):
except Exception as err:
st.warning(f"Failed in Image generation: {err}")
saved_blog_to_file = save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc,
blog_tags, blog_categories, generated_image_filepath)
saved_blog_to_file = save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc,
blog_tags, blog_categories, generated_image_filepath)
status.update(label=f"Saved the content in this file: {saved_blog_to_file}")
logger.info(f"\n\n --------- Finished writing Blog for : {search_keywords} -------------- \n")
# Render the result on streamlit UI
st.image(generated_image_filepath)
st.markdown(f"{blog_markdown_str}")
status.update(label=f"Finished, Review & Use your Original Content Below: {saved_blog_to_file}", state="complete")
# Display options below the content
col1, col2, col3, col4, col5 = st.columns(5)
if col1.button('Copy'):
pyperclip.copy(blog_markdown_str)
st.success("Text copied to clipboard!")
if col2.button('Rephrase'):
rephrased_text = rephrase_text(blog_markdown_str)
st.markdown(rephrased_text)
if col3.button('Change Tone'):
tone = st.selectbox("Select Tone", ["Formal", "Casual", "Professional"])
if st.button("Apply Tone"):
toned_text = change_tone(blog_markdown_str, tone)
st.markdown(toned_text)
if col4.button('Make Shorter'):
shorter_text = make_shorter(blog_markdown_str)
st.markdown(shorter_text)
if col5.button('Translate'):
language = st.selectbox("Select Language", ["Spanish", "French", "German"])
if st.button("Translate"):
translated_text = translate_text(blog_markdown_str, language)
st.markdown(translated_text)
# Render the result on streamlit UI
if generated_image_filepath:
st.image(generated_image_filepath)
st.markdown(f"{blog_markdown_str}")
status.update(label=f"Finished, Review & Use your Original Content Below: {saved_blog_to_file}",
state="complete")
# Passing the text and language to the engine, here we have marked slow=False. Which tells
# the module that the converted audio should have a high speed
tts = gTTS(text=blog_markdown_str, lang='en', slow=False)
# Saving the converted audio in a mp3 file
tts.save("delete_me.mp3")
st.audio("delete_me.mp3")

View File

@@ -124,15 +124,15 @@ def long_form_generator(content_keywords):
# Configure generative AI
load_dotenv(Path('../.env'))
generation_config = {
"temperature": 0.6,
"temperature": 0.7,
"top_p": 1,
"max_output_tokens": 8096,
}
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
# Initialize the generative model
#model = genai.GenerativeModel('gemini-pro', generation_config=generation_config)
model_pro = genai.GenerativeModel('gemini-1.5-flash', generation_config=generation_config)
model = genai.GenerativeModel('gemini-1.5-flash', generation_config=generation_config)
model_pro = genai.GenerativeModel('gemini-pro', generation_config=generation_config)
# Do SERP web research for given keywords to generate title and outline.
web_research_result, g_titles = do_google_serp_search(content_keywords)
@@ -203,14 +203,14 @@ def long_form_generator(content_keywords):
logger.info(f"Writing in progress... Current draft length: {len(draft)} characters")
status.update(label=f"Writing in progress... Current draft length: {len(draft)} characters")
search_terms = f"""
I will provide you with blog outline, your task is to read the outline & return 8 google search keywords.
I will provide you with content outline below, your task is to read the outline & return 8 google search keywords.
Your response will be used to do web research for writing on the given outline.
Do not explain your response, provide 8 google search sentences encompassing the given content outline.
Provide the search term results as comma separated values.\n\n
Important: Provide the search term results as comma separated values.\n\n
Content Outline:\n
'{content_outline}'
"""
search_words = generate_with_retry(model_pro, search_terms).text
search_words = generate_with_retry(model, search_terms).text
status.update(label=f"Search terms from written draft: {search_words}")
while 'IAMDONE' not in continuation:
@@ -218,6 +218,7 @@ def long_form_generator(content_keywords):
str_list = re.split(r',\s*', search_words)
# Strip quotes from each element
str_list = [s.strip('\'"') for s in str_list]
for search_term in str_list:
web_research_result, m_titles, t_titles = do_tavily_ai_search(search_term, max_results=5)
try:

View File

@@ -17,7 +17,7 @@ logger.add(sys.stdout,
)
from ..ai_web_researcher.firecrawl_web_crawler import scrape_url
from ..blog_metadata.get_blog_metadata import blog_metadata
from ..blog_metadata.get_blog_metadata import blog_metadata, run_async
from ..blog_postprocessing.save_blog_to_file import save_blog_to_file
from ..gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
@@ -31,7 +31,11 @@ def blog_from_url(weburl):
# Use to store the blog in a string, to save in a *.md file.
blog_markdown_str = None
tavily_search_result = None
example_blog_titles = []
# Initializing the variables
blog_title = None
blog_meta_desc = None
blog_tags = None
blog_categories = None
logger.info(f"Researching and Writing Blog on: {weburl}")
with st.status("Started Writing..", expanded=True) as status:
@@ -39,12 +43,12 @@ def blog_from_url(weburl):
status.update(label=f"Researching and Writing Blog on: {weburl}")
try:
scraped_text = scrape_url(weburl)
logger.info(scraped_text)
#logger.info(scraped_text)
except Exception as err:
st.error(f"Failed to scrape web page from url-{weburl} - Error: {err}")
logger.error(f"Failed in web research: {err}")
st.stop()
status.update(label="Successfully Scraped/Fetched url: {weburl}", expanded=False, state="complete")
status.update(label=f"Successfully Scraped/Fetched url: {weburl}", expanded=False, state="complete")
with st.status(f"Started Writing blog from {weburl}..", expanded=True) as status:
# Do Tavily AI research to augument the above blog.
@@ -58,7 +62,7 @@ def blog_from_url(weburl):
try:
status.update(label="🙎 Generating - Title, Meta Description, Tags, Categories for the content.")
blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str)
blog_title, blog_meta_desc, blog_tags, blog_categories = run_async(blog_metadata(blog_markdown_str))
except Exception as err:
st.error(f"Failed to get blog metadata: {err}")
@@ -71,8 +75,11 @@ def blog_from_url(weburl):
saved_blog_to_file = save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc,
blog_tags, blog_categories, generated_image_filepath)
status.update(label=f"Saved the content in this file: {saved_blog_to_file}")
logger.info(f"\n\n --------- Finished writing Blog for : {weburl} -------------- \n")
st.image(generated_image_filepath)
if generated_image_filepath:
st.image(generated_image_filepath)
st.markdown(f"{blog_markdown_str}")
status.update(label=f"Finished, Review & Use your Original Content Below: {saved_blog_to_file}", state="complete")