YT to blog, bug fixes - WIP

2024-10-12 07:59:13 +05:30
parent 16bcd86bb7
commit e6f60feba5
6 changed files with 170 additions and 133 deletions
--- a/lib/ai_writers/speech_to_blog/main_audio_to_blog.py
+++ b/lib/ai_writers/speech_to_blog/main_audio_to_blog.py
@@ -2,10 +2,13 @@ import os
 import datetime #I wish
 import sys
 from textwrap import dedent
 import openai
 from tqdm import tqdm, trange
 import time
 from pytubefix import YouTube
 import tempfile
 from html2image import Html2Image
 from loguru import logger
 logger.remove()
 logger.add(sys.stdout,
@@ -13,11 +16,70 @@ logger.add(sys.stdout,
        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
    )
 from .write_blogs_from_youtube_videos import youtube_to_blog
 from ...ai_web_researcher.gpt_online_researcher import do_google_serp_search
 from ..blog_from_google_serp import blog_with_research
 from ...blog_metadata.get_blog_metadata import blog_metadata
 from ...blog_postprocessing.save_blog_to_file import save_blog_to_file
 from ...gpt_providers.audio_to_text_generation.stt_audio_blog import speech_to_text
 from ...gpt_providers.text_generation.main_text_generation import llm_text_gen
 def youtube_to_blog(video_url):
    """Function to transcribe a given youtube url """
    try:
        # Starting the speech-to-text process
        logger.info("Starting with Speech to Text.")
        audio_text, audio_title = speech_to_text(video_url)
    except Exception as e:
        logger.error(f"Error in speech_to_text: {e}")
        sys.exit(1)  # Exit the program due to error in speech_to_text
    try:
        # Summarizing the content of the YouTube video
        audio_blog_content = summarize_youtube_video(audio_text)
        logger.info("Successfully converted given URL to blog article.")
        return audio_blog_content, audio_title
    except Exception as e:
        logger.error(f"Error in summarize_youtube_video: {e}")
        return False
 def summarize_youtube_video(user_content):
    """Generates a summary of a YouTube video using OpenAI GPT-3 and displays a progress bar. 
    Args:
      video_link: The URL of the YouTube video to summarize.
    Returns:
      A string containing the summary of the video.
    """
    logger.info("Start summarize_youtube_video..")
    prompt = f"""
        You are an expert copywriter specializing in digital content writing. I will provide you with a transcript. 
        Your task is to transform a given transcript into a well-structured and informative blog article. 
        Please follow the below objectives:
        1. Master the Transcript: Understand main ideas, key points, and the core message.
        2. Sentence Structure: Rephrase while preserving logical flow and coherence. Dont quote anyone from video.
        3. Note: Check if the transcript is about programming, then include code examples and snippets in your article.
        4. Write Unique Content: Avoid direct copying; rewrite in your own words. 
        5. REMEMBER to avoid direct quoting and maintain uniqueness.
        6. Proofread: Check for grammar, spelling, and punctuation errors.
        7. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, interjections, and colloquialisms.        8. Avoid repetitive phrases and unnatural sentence structures.
        9. Ensure Uniqueness: Guarantee the article is plagiarism-free.
        10. Punctuation: Use appropriate question marks at the end of questions.
        11. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
        12. Rephrase words like 'video, youtube, channel' with 'article, blog' and such suitable words.
        Follow the above guidelines to create a well-optimized, unique, and informative article,
        that will rank well in search engine results and engage readers effectively.
        Follow above guidelines to craft a blog content from the following transcript:\n{user_content}
        """
    try:
        response = llm_text_gen(prompt)
        return response
    except Exception as err:
        logger.error(f"Failed to summarize_youtube_video: {err}")
        exit(1)
 def generate_audio_blog(audio_input):
--- a/lib/ai_writers/speech_to_blog/write_blogs_from_youtube_videos.py
+++ b/lib/ai_writers/speech_to_blog/write_blogs_from_youtube_videos.py
@@ -1,82 +0,0 @@
 import os
 import time
 import sys
 from pytube import YouTube
 import tempfile
 import openai
 from html2image import Html2Image
 from tqdm import tqdm, trange
 import google.generativeai as genai
 from loguru import logger
 logger.remove()
 logger.add(sys.stdout,
        colorize=True,
        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
    )
 from ...gpt_providers.audio_to_text_generation.stt_audio_blog import speech_to_text
 from ...gpt_providers.text_generation.main_text_generation import llm_text_gen
 def youtube_to_blog(video_url):
    """Function to transcribe a given youtube url """
    # fixme: Doesnt work all types of yt urls.
    vid_id = video_url.split("=")[1]
    try:
        # Starting the speech-to-text process
        logger.info("Starting with Speech to Text.")
        audio_text, audio_title = speech_to_text(video_url)
    except Exception as e:
        logger.error(f"Error in speech_to_text: {e}")
        sys.exit(1)  # Exit the program due to error in speech_to_text
    try:
        # Summarizing the content of the YouTube video
        audio_blog_content = summarize_youtube_video(audio_text)
        logger.info("Successfully converted given URL to blog article.")
        return audio_blog_content, audio_title
    except Exception as e:
        logger.error(f"Error in summarize_youtube_video: {e}")
        sys.exit(1)  # Exit the program due to error in summarize_youtube_video
 def summarize_youtube_video(user_content):
    """Generates a summary of a YouTube video using OpenAI GPT-3 and displays a progress bar. 
    Args:
      video_link: The URL of the YouTube video to summarize.
    Returns:
      A string containing the summary of the video.
    """
    logger.info("Start summarize_youtube_video..")
    prompt = f"""
        You are an expert copywriter specializing in digital content writing. I will provide you with a transcript. 
        Your task is to transform a given transcript into a well-structured and informative blog article. 
        Please follow the below objectives:
        1. Master the Transcript: Understand main ideas, key points, and the core message.
        2. Sentence Structure: Rephrase while preserving logical flow and coherence. Dont quote anyone from video.
        3. Note: Check if the transcript is about programming, then include code examples and snippets in your article.
        4. Write Unique Content: Avoid direct copying; rewrite in your own words. 
        5. REMEMBER to avoid direct quoting and maintain uniqueness.
        6. Proofread: Check for grammar, spelling, and punctuation errors.
        7. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, interjections, and colloquialisms.        8. Avoid repetitive phrases and unnatural sentence structures.
        9. Ensure Uniqueness: Guarantee the article is plagiarism-free.
        10. Punctuation: Use appropriate question marks at the end of questions.
        11. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
        12. Rephrase words like 'video, youtube, channel' with 'article, blog' and such suitable words.
        Follow the above guidelines to create a well-optimized, unique, and informative article,
        that will rank well in search engine results and engage readers effectively.
        Follow above guidelines to craft a blog content from the following transcript:\n{user_content}
        """
    try:
        response = llm_text_gen(prompt)
        return response
    except Exception as err:
        logger.error(f"Failed to summarize_youtube_video: {err}")
        exit(1)
--- a/lib/gpt_providers/audio_to_text_generation/gemini_audio_text.py
+++ b/lib/gpt_providers/audio_to_text_generation/gemini_audio_text.py
@@ -1,61 +1,101 @@
 import os
 import sys
 import google.generativeai as genai
 from dotenv import load_dotenv
 from loguru import logger
 logger.remove()
 logger.add(sys.stdout,
        colorize=True,
        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
    )
 def load_environment():
-    """Load environment variables from a .env file."""
+    """Loads environment variables from a .env file."""
    load_dotenv()
    logger.info("Environment variables loaded successfully.")
 def configure_google_api():
-    """Configure the Google API for audio summarization."""
+    """Configures the Google Gemini API for audio transcription.
    Raises:
        ValueError: If the GEMINI_API_KEY environment variable is not set.
    """
    api_key = os.getenv("GEMINI_API_KEY")
    if not api_key:
-        raise ValueError("Google API key not found. Please set the GEMINI_API_KEY environment variable.")
+        error_message = "Google API key not found. Please set the GEMINI_API_KEY environment variable."
        logger.error(error_message)
        raise ValueError(error_message)
    genai.configure(api_key=api_key)
    logger.info("Google Gemini API configured successfully.")
 def transcribe_audio(audio_file_path):
-    """Summarize the audio using Google's Generative API.
+    """
-    
+    Transcribes audio using Google's Gemini Pro model.
    Args:
-        audio_file_path (str): The path to the audio file to be summarized.
+        audio_file_path (str): The path to the audio file to be transcribed.
-    
+
    Returns:
-        str: The summary text of the audio.
+        str: The transcribed text from the audio. 
-    
+             Returns None if transcription fails.
    Raises:
-        ValueError: If the audio file path is invalid or the API response is not successful.
+        FileNotFoundError: If the audio file is not found.
        Exception: For any other errors that occur during the process.
    """
    try:
-        # Load environment variables and configure API
+        # Load environment variables and configure the Google API
        load_environment()
        configure_google_api()
-        # Create generative model instance
+        logger.info(f"Attempting to transcribe audio file: {audio_file_path}")
-        model = genai.GenerativeModel("models/gemini-1.5-pro-latest")
+
-        audio_file = None
+        # Check if file exists
-        try:
+        if not os.path.exists(audio_file_path):
-            # Upload the audio file
+            error_message = f"FileNotFoundError: The audio file at {audio_file_path} does not exist."
-            audio_file = genai.upload_file(path=audio_file_path)
+            logger.error(error_message)
-        except Exception as err:
+            raise FileNotFoundError(error_message)
-            print(err)
+
-        # Generate the summary
+        # Initialize a Gemini model appropriate for your use case.
-        response = model.generate_content(
+        model = genai.GenerativeModel(model_name="gemini-1.5-flash")
-            [
+
-                "Listen carefully to the given following audio file. Transcribe the following given audio.",
+        # Upload the audio file
-                audio_file
+        try:
-            ]
+            audio_file = genai.upload_file(audio_file_path)
-        )
+            logger.info(f"Audio file uploaded successfully: {audio_file=}")
-        
+        except FileNotFoundError:
-        # Check if the response contains text
+            error_message = f"FileNotFoundError: The audio file at {audio_file_path} does not exist."
-        if not hasattr(response, 'text'):
+            logger.error(error_message)
-            raise ValueError("The API response does not contain text.")
+            raise FileNotFoundError(error_message) 
-        
+        except Exception as e:
-        return response.text
+            logger.error(f"Error uploading audio file: {e}")
            return None
        # Generate the transcription
        try:
            response = model.generate_content([
                "Transcribe the following audio:",
                audio_file
            ])
            # Check for valid response and extract text
            if response and hasattr(response, 'text'):
                transcript = response.text
                logger.info(f"Transcription successful:\n{transcript}")
                return transcript
            else:
                logger.warning("Transcription failed: Invalid or empty response from API.")
                return None
        except Exception as e:
            logger.error(f"Error during transcription: {e}")
            return None
    except ValueError as ve:
        print(f"ValueError: {ve}")
    except FileNotFoundError:
        print(f"FileNotFoundError: The audio file at {audio_file_path} does not exist.")
    except Exception as e:
-        print(f"An error occurred: {e}")
+        logger.error(f"An unexpected error occurred: {e}")
        return None
--- a/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py
+++ b/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py
@@ -2,7 +2,7 @@ import os
 import re
 import sys
-from pytube import YouTube
+from pytubefix import YouTube
 from loguru import logger
 from openai import OpenAI
 from tqdm import tqdm
@@ -22,6 +22,7 @@ def progress_function(stream, chunk, bytes_remaining):
    current = ((stream.filesize - bytes_remaining) / stream.filesize)
    progress_bar.update(current - progress_bar.n)  # Update the progress bar
 def rename_file_with_underscores(file_path):
    """Rename a file by replacing spaces and special characters with underscores.
@@ -62,22 +63,32 @@ def speech_to_text(video_url):
        SystemExit: If a critical error occurs that prevents successful execution.
    """
    output_path = os.getenv("CONTENT_SAVE_DIR")
    yt = None
    audio_file = None
    with st.status("Started Writing..", expanded=False) as status:
        try:
            audio_file = None
            if video_url.startswith("https://www.youtube.com/") or video_url.startswith("http://www.youtube.com/"):
                logger.info(f"Accessing YouTube URL: {video_url}")
                status.update(label=f"Accessing YouTube URL: {video_url}")
-                yt = YouTube(video_url, on_progress_callback=progress_function)
+                try:
-    
+                    vid_id = video_url.split("=")[1]
-                logger.info("Fetching the highest quality audio stream")
+                    yt = YouTube(video_url, on_progress_callback=progress_function)
-                status.update(label="Fetching the highest quality audio stream")
+                except Exception as err:
-                audio_stream = yt.streams.filter(only_audio=True).first()
+                    logger.error(f"Failed to get pytube stream object: {err}")
                    st.stop()
                logger.info(f"Fetching the highest quality audio stream:{yt.title}")
                status.update(label=f"Fetching the highest quality audio stream: {yt.title}")
                try:
                    audio_stream = yt.streams.filter(only_audio=True).first()
                except Exception as err:
                    logger.error(f"Failed to Download Youtube Audio: {err}")
                    st.stop()
                if audio_stream is None:
                    logger.warning("No audio stream found for this video.")
                    st.warning("No audio stream found for this video.")
-                    return None
+                    st.stop()
                logger.info(f"Downloading audio for: {yt.title}")
                status.update(label=f"Downloading audio for: {yt.title}")
@@ -113,9 +124,13 @@ def speech_to_text(video_url):
                # FIXME: We can chunk hour long videos, the code is not tested.
                #long_video(audio_file)
                sys.exit("File size limit exceeded.")
-                st.error("Audio File size limit exceeded.")
+                st.error("Audio File size limit exceeded. File a fixme/issues at ALwrity github.")
            try:
                print(f"Audio File: {audio_file}")
                transcript = transcribe_audio(audio_file)
                print(f"\n\n\n--- Tracribe: {transcript}  ----\n\n\n")
                exit(1)
                status.update(label=f"Initializing OpenAI client for transcription: {audio_file}")
                logger.info(f"Initializing OpenAI client for transcription: {audio_file}")
                client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
@@ -170,7 +185,7 @@ def long_video(temp_file_name):
        video_url (str): URL of the YouTube video to be transcribed.
    """
    # Extract audio and split into chunks
-    app.logger.info(f"Processing the YT video: {temp_file_name}")
+    logger.info(f"Processing the YT video: {temp_file_name}")
    full_audio = mp.AudioFileClip(temp_file_name)
    duration = full_audio.duration
    chunk_length = 600  # 10 minutes in seconds
--- a/lib/utils/alwrity_utils.py
+++ b/lib/utils/alwrity_utils.py
@@ -169,7 +169,8 @@ def blog_from_keyword():
                st.error('🚫 Blog keywords should be at least two words long. Please try again.')
        elif input_type == "youtube_url" or input_type == "audio_file":
-            generate_audio_blog(user_input)
+            if not generate_audio_blog(user_input):
                st.stop()
        elif input_type == "web_url":
            blog_from_url(user_input)
--- a/requirements.txt
+++ b/requirements.txt
@@ -38,4 +38,5 @@ streamlit-mic-recorder
 tinify
 cloudscraper
 xmlschema
 moviepy
 googlesearch-python