From e6f60feba58cb446349833bb7b7492bf903a0e95 Mon Sep 17 00:00:00 2001 From: ajaysi Date: Sat, 12 Oct 2024 07:59:13 +0530 Subject: [PATCH] YT to blog, bug fixes - WIP --- .../speech_to_blog/main_audio_to_blog.py | 66 +++++++++- .../write_blogs_from_youtube_videos.py | 82 ------------- .../gemini_audio_text.py | 116 ++++++++++++------ .../stt_audio_blog.py | 35 ++++-- lib/utils/alwrity_utils.py | 3 +- requirements.txt | 1 + 6 files changed, 170 insertions(+), 133 deletions(-) delete mode 100644 lib/ai_writers/speech_to_blog/write_blogs_from_youtube_videos.py diff --git a/lib/ai_writers/speech_to_blog/main_audio_to_blog.py b/lib/ai_writers/speech_to_blog/main_audio_to_blog.py index 62cba2f4..432d4a7e 100644 --- a/lib/ai_writers/speech_to_blog/main_audio_to_blog.py +++ b/lib/ai_writers/speech_to_blog/main_audio_to_blog.py @@ -2,10 +2,13 @@ import os import datetime #I wish import sys from textwrap import dedent -import openai from tqdm import tqdm, trange import time +from pytubefix import YouTube +import tempfile +from html2image import Html2Image + from loguru import logger logger.remove() logger.add(sys.stdout, @@ -13,11 +16,70 @@ logger.add(sys.stdout, format="{level}|{file}:{line}:{function}| {message}" ) -from .write_blogs_from_youtube_videos import youtube_to_blog from ...ai_web_researcher.gpt_online_researcher import do_google_serp_search from ..blog_from_google_serp import blog_with_research from ...blog_metadata.get_blog_metadata import blog_metadata from ...blog_postprocessing.save_blog_to_file import save_blog_to_file +from ...gpt_providers.audio_to_text_generation.stt_audio_blog import speech_to_text +from ...gpt_providers.text_generation.main_text_generation import llm_text_gen + + +def youtube_to_blog(video_url): + """Function to transcribe a given youtube url """ + try: + # Starting the speech-to-text process + logger.info("Starting with Speech to Text.") + audio_text, audio_title = speech_to_text(video_url) + except Exception as e: + logger.error(f"Error in speech_to_text: {e}") + sys.exit(1) # Exit the program due to error in speech_to_text + + try: + # Summarizing the content of the YouTube video + audio_blog_content = summarize_youtube_video(audio_text) + logger.info("Successfully converted given URL to blog article.") + return audio_blog_content, audio_title + except Exception as e: + logger.error(f"Error in summarize_youtube_video: {e}") + return False + + +def summarize_youtube_video(user_content): + """Generates a summary of a YouTube video using OpenAI GPT-3 and displays a progress bar. + Args: + video_link: The URL of the YouTube video to summarize. + Returns: + A string containing the summary of the video. + """ + + logger.info("Start summarize_youtube_video..") + prompt = f""" + You are an expert copywriter specializing in digital content writing. I will provide you with a transcript. + Your task is to transform a given transcript into a well-structured and informative blog article. + Please follow the below objectives: + + 1. Master the Transcript: Understand main ideas, key points, and the core message. + 2. Sentence Structure: Rephrase while preserving logical flow and coherence. Dont quote anyone from video. + 3. Note: Check if the transcript is about programming, then include code examples and snippets in your article. + 4. Write Unique Content: Avoid direct copying; rewrite in your own words. + 5. REMEMBER to avoid direct quoting and maintain uniqueness. + 6. Proofread: Check for grammar, spelling, and punctuation errors. + 7. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, interjections, and colloquialisms. 8. Avoid repetitive phrases and unnatural sentence structures. + 9. Ensure Uniqueness: Guarantee the article is plagiarism-free. + 10. Punctuation: Use appropriate question marks at the end of questions. + 11. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools. + 12. Rephrase words like 'video, youtube, channel' with 'article, blog' and such suitable words. + + Follow the above guidelines to create a well-optimized, unique, and informative article, + that will rank well in search engine results and engage readers effectively. + Follow above guidelines to craft a blog content from the following transcript:\n{user_content} + """ + try: + response = llm_text_gen(prompt) + return response + except Exception as err: + logger.error(f"Failed to summarize_youtube_video: {err}") + exit(1) def generate_audio_blog(audio_input): diff --git a/lib/ai_writers/speech_to_blog/write_blogs_from_youtube_videos.py b/lib/ai_writers/speech_to_blog/write_blogs_from_youtube_videos.py deleted file mode 100644 index 62fbe6d9..00000000 --- a/lib/ai_writers/speech_to_blog/write_blogs_from_youtube_videos.py +++ /dev/null @@ -1,82 +0,0 @@ -import os -import time -import sys - -from pytube import YouTube -import tempfile -import openai -from html2image import Html2Image -from tqdm import tqdm, trange -import google.generativeai as genai - -from loguru import logger -logger.remove() -logger.add(sys.stdout, - colorize=True, - format="{level}|{file}:{line}:{function}| {message}" - ) - - -from ...gpt_providers.audio_to_text_generation.stt_audio_blog import speech_to_text -from ...gpt_providers.text_generation.main_text_generation import llm_text_gen - - -def youtube_to_blog(video_url): - """Function to transcribe a given youtube url """ - # fixme: Doesnt work all types of yt urls. - vid_id = video_url.split("=")[1] - - try: - # Starting the speech-to-text process - logger.info("Starting with Speech to Text.") - audio_text, audio_title = speech_to_text(video_url) - except Exception as e: - logger.error(f"Error in speech_to_text: {e}") - sys.exit(1) # Exit the program due to error in speech_to_text - - try: - # Summarizing the content of the YouTube video - audio_blog_content = summarize_youtube_video(audio_text) - logger.info("Successfully converted given URL to blog article.") - return audio_blog_content, audio_title - except Exception as e: - logger.error(f"Error in summarize_youtube_video: {e}") - sys.exit(1) # Exit the program due to error in summarize_youtube_video - - -def summarize_youtube_video(user_content): - """Generates a summary of a YouTube video using OpenAI GPT-3 and displays a progress bar. - Args: - video_link: The URL of the YouTube video to summarize. - Returns: - A string containing the summary of the video. - """ - - logger.info("Start summarize_youtube_video..") - prompt = f""" - You are an expert copywriter specializing in digital content writing. I will provide you with a transcript. - Your task is to transform a given transcript into a well-structured and informative blog article. - Please follow the below objectives: - - 1. Master the Transcript: Understand main ideas, key points, and the core message. - 2. Sentence Structure: Rephrase while preserving logical flow and coherence. Dont quote anyone from video. - 3. Note: Check if the transcript is about programming, then include code examples and snippets in your article. - 4. Write Unique Content: Avoid direct copying; rewrite in your own words. - 5. REMEMBER to avoid direct quoting and maintain uniqueness. - 6. Proofread: Check for grammar, spelling, and punctuation errors. - 7. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, interjections, and colloquialisms. 8. Avoid repetitive phrases and unnatural sentence structures. - 9. Ensure Uniqueness: Guarantee the article is plagiarism-free. - 10. Punctuation: Use appropriate question marks at the end of questions. - 11. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools. - 12. Rephrase words like 'video, youtube, channel' with 'article, blog' and such suitable words. - - Follow the above guidelines to create a well-optimized, unique, and informative article, - that will rank well in search engine results and engage readers effectively. - Follow above guidelines to craft a blog content from the following transcript:\n{user_content} - """ - try: - response = llm_text_gen(prompt) - return response - except Exception as err: - logger.error(f"Failed to summarize_youtube_video: {err}") - exit(1) diff --git a/lib/gpt_providers/audio_to_text_generation/gemini_audio_text.py b/lib/gpt_providers/audio_to_text_generation/gemini_audio_text.py index f692f94d..5625dcea 100644 --- a/lib/gpt_providers/audio_to_text_generation/gemini_audio_text.py +++ b/lib/gpt_providers/audio_to_text_generation/gemini_audio_text.py @@ -1,61 +1,101 @@ import os +import sys + import google.generativeai as genai from dotenv import load_dotenv +from loguru import logger +logger.remove() +logger.add(sys.stdout, + colorize=True, + format="{level}|{file}:{line}:{function}| {message}" + ) + + def load_environment(): - """Load environment variables from a .env file.""" + """Loads environment variables from a .env file.""" load_dotenv() + logger.info("Environment variables loaded successfully.") + def configure_google_api(): - """Configure the Google API for audio summarization.""" + """Configures the Google Gemini API for audio transcription. + + Raises: + ValueError: If the GEMINI_API_KEY environment variable is not set. + """ api_key = os.getenv("GEMINI_API_KEY") if not api_key: - raise ValueError("Google API key not found. Please set the GEMINI_API_KEY environment variable.") + error_message = "Google API key not found. Please set the GEMINI_API_KEY environment variable." + logger.error(error_message) + raise ValueError(error_message) + genai.configure(api_key=api_key) + logger.info("Google Gemini API configured successfully.") + def transcribe_audio(audio_file_path): - """Summarize the audio using Google's Generative API. - + """ + Transcribes audio using Google's Gemini Pro model. + Args: - audio_file_path (str): The path to the audio file to be summarized. - + audio_file_path (str): The path to the audio file to be transcribed. + Returns: - str: The summary text of the audio. - + str: The transcribed text from the audio. + Returns None if transcription fails. + Raises: - ValueError: If the audio file path is invalid or the API response is not successful. - Exception: For any other errors that occur during the process. + FileNotFoundError: If the audio file is not found. """ try: - # Load environment variables and configure API + # Load environment variables and configure the Google API load_environment() configure_google_api() - # Create generative model instance - model = genai.GenerativeModel("models/gemini-1.5-pro-latest") - audio_file = None - try: - # Upload the audio file - audio_file = genai.upload_file(path=audio_file_path) - except Exception as err: - print(err) - # Generate the summary - response = model.generate_content( - [ - "Listen carefully to the given following audio file. Transcribe the following given audio.", - audio_file - ] - ) - - # Check if the response contains text - if not hasattr(response, 'text'): - raise ValueError("The API response does not contain text.") - - return response.text + logger.info(f"Attempting to transcribe audio file: {audio_file_path}") + + # Check if file exists + if not os.path.exists(audio_file_path): + error_message = f"FileNotFoundError: The audio file at {audio_file_path} does not exist." + logger.error(error_message) + raise FileNotFoundError(error_message) + + # Initialize a Gemini model appropriate for your use case. + model = genai.GenerativeModel(model_name="gemini-1.5-flash") + + # Upload the audio file + try: + audio_file = genai.upload_file(audio_file_path) + logger.info(f"Audio file uploaded successfully: {audio_file=}") + except FileNotFoundError: + error_message = f"FileNotFoundError: The audio file at {audio_file_path} does not exist." + logger.error(error_message) + raise FileNotFoundError(error_message) + except Exception as e: + logger.error(f"Error uploading audio file: {e}") + return None + + # Generate the transcription + try: + response = model.generate_content([ + "Transcribe the following audio:", + audio_file + ]) + + # Check for valid response and extract text + if response and hasattr(response, 'text'): + transcript = response.text + logger.info(f"Transcription successful:\n{transcript}") + return transcript + else: + logger.warning("Transcription failed: Invalid or empty response from API.") + return None + + except Exception as e: + logger.error(f"Error during transcription: {e}") + return None - except ValueError as ve: - print(f"ValueError: {ve}") - except FileNotFoundError: - print(f"FileNotFoundError: The audio file at {audio_file_path} does not exist.") except Exception as e: - print(f"An error occurred: {e}") + logger.error(f"An unexpected error occurred: {e}") + return None diff --git a/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py b/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py index ba6e7ffe..a52261be 100644 --- a/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py +++ b/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py @@ -2,7 +2,7 @@ import os import re import sys -from pytube import YouTube +from pytubefix import YouTube from loguru import logger from openai import OpenAI from tqdm import tqdm @@ -22,6 +22,7 @@ def progress_function(stream, chunk, bytes_remaining): current = ((stream.filesize - bytes_remaining) / stream.filesize) progress_bar.update(current - progress_bar.n) # Update the progress bar + def rename_file_with_underscores(file_path): """Rename a file by replacing spaces and special characters with underscores. @@ -62,22 +63,32 @@ def speech_to_text(video_url): SystemExit: If a critical error occurs that prevents successful execution. """ output_path = os.getenv("CONTENT_SAVE_DIR") + yt = None + audio_file = None with st.status("Started Writing..", expanded=False) as status: try: - audio_file = None if video_url.startswith("https://www.youtube.com/") or video_url.startswith("http://www.youtube.com/"): logger.info(f"Accessing YouTube URL: {video_url}") status.update(label=f"Accessing YouTube URL: {video_url}") - yt = YouTube(video_url, on_progress_callback=progress_function) - - logger.info("Fetching the highest quality audio stream") - status.update(label="Fetching the highest quality audio stream") - audio_stream = yt.streams.filter(only_audio=True).first() + try: + vid_id = video_url.split("=")[1] + yt = YouTube(video_url, on_progress_callback=progress_function) + except Exception as err: + logger.error(f"Failed to get pytube stream object: {err}") + st.stop() + logger.info(f"Fetching the highest quality audio stream:{yt.title}") + status.update(label=f"Fetching the highest quality audio stream: {yt.title}") + try: + audio_stream = yt.streams.filter(only_audio=True).first() + except Exception as err: + logger.error(f"Failed to Download Youtube Audio: {err}") + st.stop() + if audio_stream is None: logger.warning("No audio stream found for this video.") st.warning("No audio stream found for this video.") - return None + st.stop() logger.info(f"Downloading audio for: {yt.title}") status.update(label=f"Downloading audio for: {yt.title}") @@ -113,9 +124,13 @@ def speech_to_text(video_url): # FIXME: We can chunk hour long videos, the code is not tested. #long_video(audio_file) sys.exit("File size limit exceeded.") - st.error("Audio File size limit exceeded.") + st.error("Audio File size limit exceeded. File a fixme/issues at ALwrity github.") try: + print(f"Audio File: {audio_file}") + transcript = transcribe_audio(audio_file) + print(f"\n\n\n--- Tracribe: {transcript} ----\n\n\n") + exit(1) status.update(label=f"Initializing OpenAI client for transcription: {audio_file}") logger.info(f"Initializing OpenAI client for transcription: {audio_file}") client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) @@ -170,7 +185,7 @@ def long_video(temp_file_name): video_url (str): URL of the YouTube video to be transcribed. """ # Extract audio and split into chunks - app.logger.info(f"Processing the YT video: {temp_file_name}") + logger.info(f"Processing the YT video: {temp_file_name}") full_audio = mp.AudioFileClip(temp_file_name) duration = full_audio.duration chunk_length = 600 # 10 minutes in seconds diff --git a/lib/utils/alwrity_utils.py b/lib/utils/alwrity_utils.py index 1b1cba85..b37d623b 100644 --- a/lib/utils/alwrity_utils.py +++ b/lib/utils/alwrity_utils.py @@ -169,7 +169,8 @@ def blog_from_keyword(): st.error('🚫 Blog keywords should be at least two words long. Please try again.') elif input_type == "youtube_url" or input_type == "audio_file": - generate_audio_blog(user_input) + if not generate_audio_blog(user_input): + st.stop() elif input_type == "web_url": blog_from_url(user_input) diff --git a/requirements.txt b/requirements.txt index 7c3b7c21..75deaf0a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,4 +38,5 @@ streamlit-mic-recorder tinify cloudscraper xmlschema +moviepy googlesearch-python