From e6f60feba58cb446349833bb7b7492bf903a0e95 Mon Sep 17 00:00:00 2001
From: ajaysi <ajay.calsoft@gmail.com>
Date: Sat, 12 Oct 2024 07:59:13 +0530
Subject: [PATCH] YT to blog, bug fixes - WIP

---
 .../speech_to_blog/main_audio_to_blog.py      |  66 +++++++++-
 .../write_blogs_from_youtube_videos.py        |  82 -------------
 .../gemini_audio_text.py                      | 116 ++++++++++++------
 .../stt_audio_blog.py                         |  35 ++++--
 lib/utils/alwrity_utils.py                    |   3 +-
 requirements.txt                              |   1 +
 6 files changed, 170 insertions(+), 133 deletions(-)
 delete mode 100644 lib/ai_writers/speech_to_blog/write_blogs_from_youtube_videos.py
diff --git a/lib/ai_writers/speech_to_blog/main_audio_to_blog.py b/lib/ai_writers/speech_to_blog/main_audio_to_blog.py
index 62cba2f4..432d4a7e 100644
--- a/lib/ai_writers/speech_to_blog/main_audio_to_blog.py
+++ b/lib/ai_writers/speech_to_blog/main_audio_to_blog.py
@@ -2,10 +2,13 @@ import os
 import datetime #I wish
 import sys
 from textwrap import dedent
-import openai
 from tqdm import tqdm, trange
 import time
 
+from pytubefix import YouTube
+import tempfile
+from html2image import Html2Image
+
 from loguru import logger
 logger.remove()
 logger.add(sys.stdout,
@@ -13,11 +16,70 @@ logger.add(sys.stdout,
         format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
     )
 
-from .write_blogs_from_youtube_videos import youtube_to_blog
 from ...ai_web_researcher.gpt_online_researcher import do_google_serp_search
 from ..blog_from_google_serp import blog_with_research
 from ...blog_metadata.get_blog_metadata import blog_metadata
 from ...blog_postprocessing.save_blog_to_file import save_blog_to_file
+from ...gpt_providers.audio_to_text_generation.stt_audio_blog import speech_to_text
+from ...gpt_providers.text_generation.main_text_generation import llm_text_gen
+
+
+def youtube_to_blog(video_url):
+    """Function to transcribe a given youtube url """
+    try:
+        # Starting the speech-to-text process
+        logger.info("Starting with Speech to Text.")
+        audio_text, audio_title = speech_to_text(video_url)
+    except Exception as e:
+        logger.error(f"Error in speech_to_text: {e}")
+        sys.exit(1)  # Exit the program due to error in speech_to_text
+
+    try:
+        # Summarizing the content of the YouTube video
+        audio_blog_content = summarize_youtube_video(audio_text)
+        logger.info("Successfully converted given URL to blog article.")
+        return audio_blog_content, audio_title
+    except Exception as e:
+        logger.error(f"Error in summarize_youtube_video: {e}")
+        return False
+
+
+def summarize_youtube_video(user_content):
+    """Generates a summary of a YouTube video using OpenAI GPT-3 and displays a progress bar. 
+    Args:
+      video_link: The URL of the YouTube video to summarize.
+    Returns:
+      A string containing the summary of the video.
+    """
+
+    logger.info("Start summarize_youtube_video..")
+    prompt = f"""
+        You are an expert copywriter specializing in digital content writing. I will provide you with a transcript. 
+        Your task is to transform a given transcript into a well-structured and informative blog article. 
+        Please follow the below objectives:
+
+        1. Master the Transcript: Understand main ideas, key points, and the core message.
+        2. Sentence Structure: Rephrase while preserving logical flow and coherence. Dont quote anyone from video.
+        3. Note: Check if the transcript is about programming, then include code examples and snippets in your article.
+        4. Write Unique Content: Avoid direct copying; rewrite in your own words. 
+        5. REMEMBER to avoid direct quoting and maintain uniqueness.
+        6. Proofread: Check for grammar, spelling, and punctuation errors.
+        7. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, interjections, and colloquialisms.        8. Avoid repetitive phrases and unnatural sentence structures.
+        9. Ensure Uniqueness: Guarantee the article is plagiarism-free.
+        10. Punctuation: Use appropriate question marks at the end of questions.
+        11. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
+        12. Rephrase words like 'video, youtube, channel' with 'article, blog' and such suitable words.
+
+        Follow the above guidelines to create a well-optimized, unique, and informative article,
+        that will rank well in search engine results and engage readers effectively.
+        Follow above guidelines to craft a blog content from the following transcript:\n{user_content}
+        """
+    try:
+        response = llm_text_gen(prompt)
+        return response
+    except Exception as err:
+        logger.error(f"Failed to summarize_youtube_video: {err}")
+        exit(1)
 
 
 def generate_audio_blog(audio_input):
diff --git a/lib/ai_writers/speech_to_blog/write_blogs_from_youtube_videos.py b/lib/ai_writers/speech_to_blog/write_blogs_from_youtube_videos.py
deleted file mode 100644
index 62fbe6d9..00000000
--- a/lib/ai_writers/speech_to_blog/write_blogs_from_youtube_videos.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import os
-import time
-import sys
-
-from pytube import YouTube
-import tempfile
-import openai
-from html2image import Html2Image
-from tqdm import tqdm, trange
-import google.generativeai as genai
-
-from loguru import logger
-logger.remove()
-logger.add(sys.stdout,
-        colorize=True,
-        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
-    )
-
-
-from ...gpt_providers.audio_to_text_generation.stt_audio_blog import speech_to_text
-from ...gpt_providers.text_generation.main_text_generation import llm_text_gen
-
-
-def youtube_to_blog(video_url):
-    """Function to transcribe a given youtube url """
-    # fixme: Doesnt work all types of yt urls.
-    vid_id = video_url.split("=")[1]
-
-    try:
-        # Starting the speech-to-text process
-        logger.info("Starting with Speech to Text.")
-        audio_text, audio_title = speech_to_text(video_url)
-    except Exception as e:
-        logger.error(f"Error in speech_to_text: {e}")
-        sys.exit(1)  # Exit the program due to error in speech_to_text
-
-    try:
-        # Summarizing the content of the YouTube video
-        audio_blog_content = summarize_youtube_video(audio_text)
-        logger.info("Successfully converted given URL to blog article.")
-        return audio_blog_content, audio_title
-    except Exception as e:
-        logger.error(f"Error in summarize_youtube_video: {e}")
-        sys.exit(1)  # Exit the program due to error in summarize_youtube_video
-
-
-def summarize_youtube_video(user_content):
-    """Generates a summary of a YouTube video using OpenAI GPT-3 and displays a progress bar. 
-    Args:
-      video_link: The URL of the YouTube video to summarize.
-    Returns:
-      A string containing the summary of the video.
-    """
-
-    logger.info("Start summarize_youtube_video..")
-    prompt = f"""
-        You are an expert copywriter specializing in digital content writing. I will provide you with a transcript. 
-        Your task is to transform a given transcript into a well-structured and informative blog article. 
-        Please follow the below objectives:
-
-        1. Master the Transcript: Understand main ideas, key points, and the core message.
-        2. Sentence Structure: Rephrase while preserving logical flow and coherence. Dont quote anyone from video.
-        3. Note: Check if the transcript is about programming, then include code examples and snippets in your article.
-        4. Write Unique Content: Avoid direct copying; rewrite in your own words. 
-        5. REMEMBER to avoid direct quoting and maintain uniqueness.
-        6. Proofread: Check for grammar, spelling, and punctuation errors.
-        7. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, interjections, and colloquialisms.        8. Avoid repetitive phrases and unnatural sentence structures.
-        9. Ensure Uniqueness: Guarantee the article is plagiarism-free.
-        10. Punctuation: Use appropriate question marks at the end of questions.
-        11. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
-        12. Rephrase words like 'video, youtube, channel' with 'article, blog' and such suitable words.
-
-        Follow the above guidelines to create a well-optimized, unique, and informative article,
-        that will rank well in search engine results and engage readers effectively.
-        Follow above guidelines to craft a blog content from the following transcript:\n{user_content}
-        """
-    try:
-        response = llm_text_gen(prompt)
-        return response
-    except Exception as err:
-        logger.error(f"Failed to summarize_youtube_video: {err}")
-        exit(1)
diff --git a/lib/gpt_providers/audio_to_text_generation/gemini_audio_text.py b/lib/gpt_providers/audio_to_text_generation/gemini_audio_text.py
index f692f94d..5625dcea 100644
--- a/lib/gpt_providers/audio_to_text_generation/gemini_audio_text.py
+++ b/lib/gpt_providers/audio_to_text_generation/gemini_audio_text.py
@@ -1,61 +1,101 @@
 import os
+import sys
+
 import google.generativeai as genai
 from dotenv import load_dotenv
 
+from loguru import logger
+logger.remove()
+logger.add(sys.stdout,
+        colorize=True,
+        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
+    )
+
+
 def load_environment():
-    """Load environment variables from a .env file."""
+    """Loads environment variables from a .env file."""
     load_dotenv()
+    logger.info("Environment variables loaded successfully.")
+
 
 def configure_google_api():
-    """Configure the Google API for audio summarization."""
+    """Configures the Google Gemini API for audio transcription.
+
+    Raises:
+        ValueError: If the GEMINI_API_KEY environment variable is not set.
+    """
     api_key = os.getenv("GEMINI_API_KEY")
     if not api_key:
-        raise ValueError("Google API key not found. Please set the GEMINI_API_KEY environment variable.")
+        error_message = "Google API key not found. Please set the GEMINI_API_KEY environment variable."
+        logger.error(error_message)
+        raise ValueError(error_message)
+    
     genai.configure(api_key=api_key)
+    logger.info("Google Gemini API configured successfully.")
+
 
 def transcribe_audio(audio_file_path):
-    """Summarize the audio using Google's Generative API.
-    
+    """
+    Transcribes audio using Google's Gemini Pro model.
+
     Args:
-        audio_file_path (str): The path to the audio file to be summarized.
-    
+        audio_file_path (str): The path to the audio file to be transcribed.
+
     Returns:
-        str: The summary text of the audio.
-    
+        str: The transcribed text from the audio. 
+             Returns None if transcription fails.
+
     Raises:
-        ValueError: If the audio file path is invalid or the API response is not successful.
-        Exception: For any other errors that occur during the process.
+        FileNotFoundError: If the audio file is not found.
     """
     try:
-        # Load environment variables and configure API
+        # Load environment variables and configure the Google API
         load_environment()
         configure_google_api()
 
-        # Create generative model instance
-        model = genai.GenerativeModel("models/gemini-1.5-pro-latest")
-        audio_file = None
-        try:
-            # Upload the audio file
-            audio_file = genai.upload_file(path=audio_file_path)
-        except Exception as err:
-            print(err)
-        # Generate the summary
-        response = model.generate_content(
-            [
-                "Listen carefully to the given following audio file. Transcribe the following given audio.",
-                audio_file
-            ]
-        )
-        
-        # Check if the response contains text
-        if not hasattr(response, 'text'):
-            raise ValueError("The API response does not contain text.")
-        
-        return response.text
+        logger.info(f"Attempting to transcribe audio file: {audio_file_path}")
+
+        # Check if file exists
+        if not os.path.exists(audio_file_path):
+            error_message = f"FileNotFoundError: The audio file at {audio_file_path} does not exist."
+            logger.error(error_message)
+            raise FileNotFoundError(error_message)
+
+        # Initialize a Gemini model appropriate for your use case.
+        model = genai.GenerativeModel(model_name="gemini-1.5-flash")
+
+        # Upload the audio file
+        try:
+            audio_file = genai.upload_file(audio_file_path)
+            logger.info(f"Audio file uploaded successfully: {audio_file=}")
+        except FileNotFoundError:
+            error_message = f"FileNotFoundError: The audio file at {audio_file_path} does not exist."
+            logger.error(error_message)
+            raise FileNotFoundError(error_message) 
+        except Exception as e:
+            logger.error(f"Error uploading audio file: {e}")
+            return None
+
+        # Generate the transcription
+        try:
+            response = model.generate_content([
+                "Transcribe the following audio:",
+                audio_file
+            ])
+
+            # Check for valid response and extract text
+            if response and hasattr(response, 'text'):
+                transcript = response.text
+                logger.info(f"Transcription successful:\n{transcript}")
+                return transcript
+            else:
+                logger.warning("Transcription failed: Invalid or empty response from API.")
+                return None
+
+        except Exception as e:
+            logger.error(f"Error during transcription: {e}")
+            return None
 
-    except ValueError as ve:
-        print(f"ValueError: {ve}")
-    except FileNotFoundError:
-        print(f"FileNotFoundError: The audio file at {audio_file_path} does not exist.")
     except Exception as e:
-        print(f"An error occurred: {e}")
+        logger.error(f"An unexpected error occurred: {e}")
+        return None
diff --git a/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py b/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py
index ba6e7ffe..a52261be 100644
--- a/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py
+++ b/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py
@@ -2,7 +2,7 @@ import os
 import re
 import sys
 
-from pytube import YouTube
+from pytubefix import YouTube
 from loguru import logger
 from openai import OpenAI
 from tqdm import tqdm
@@ -22,6 +22,7 @@ def progress_function(stream, chunk, bytes_remaining):
     current = ((stream.filesize - bytes_remaining) / stream.filesize)
     progress_bar.update(current - progress_bar.n)  # Update the progress bar
 
+
 def rename_file_with_underscores(file_path):
     """Rename a file by replacing spaces and special characters with underscores.
 
@@ -62,22 +63,32 @@ def speech_to_text(video_url):
         SystemExit: If a critical error occurs that prevents successful execution.
     """
     output_path = os.getenv("CONTENT_SAVE_DIR")
+    yt = None
+    audio_file = None
     with st.status("Started Writing..", expanded=False) as status:
         try:
-            audio_file = None
             if video_url.startswith("https://www.youtube.com/") or video_url.startswith("http://www.youtube.com/"):
                 logger.info(f"Accessing YouTube URL: {video_url}")
                 status.update(label=f"Accessing YouTube URL: {video_url}")
-                yt = YouTube(video_url, on_progress_callback=progress_function)
-    
-                logger.info("Fetching the highest quality audio stream")
-                status.update(label="Fetching the highest quality audio stream")
-                audio_stream = yt.streams.filter(only_audio=True).first()
+                try:
+                    vid_id = video_url.split("=")[1]
+                    yt = YouTube(video_url, on_progress_callback=progress_function)
+                except Exception as err:
+                    logger.error(f"Failed to get pytube stream object: {err}")
+                    st.stop()
     
+                logger.info(f"Fetching the highest quality audio stream:{yt.title}")
+                status.update(label=f"Fetching the highest quality audio stream: {yt.title}")
+                try:
+                    audio_stream = yt.streams.filter(only_audio=True).first()
+                except Exception as err:
+                    logger.error(f"Failed to Download Youtube Audio: {err}")
+                    st.stop()
+
                 if audio_stream is None:
                     logger.warning("No audio stream found for this video.")
                     st.warning("No audio stream found for this video.")
-                    return None
+                    st.stop()
     
                 logger.info(f"Downloading audio for: {yt.title}")
                 status.update(label=f"Downloading audio for: {yt.title}")
@@ -113,9 +124,13 @@ def speech_to_text(video_url):
                 # FIXME: We can chunk hour long videos, the code is not tested.
                 #long_video(audio_file)
                 sys.exit("File size limit exceeded.")
-                st.error("Audio File size limit exceeded.")
+                st.error("Audio File size limit exceeded. File a fixme/issues at ALwrity github.")
     
             try:
+                print(f"Audio File: {audio_file}")
+                transcript = transcribe_audio(audio_file)
+                print(f"\n\n\n--- Tracribe: {transcript}  ----\n\n\n")
+                exit(1)
                 status.update(label=f"Initializing OpenAI client for transcription: {audio_file}")
                 logger.info(f"Initializing OpenAI client for transcription: {audio_file}")
                 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
@@ -170,7 +185,7 @@ def long_video(temp_file_name):
         video_url (str): URL of the YouTube video to be transcribed.
     """
     # Extract audio and split into chunks
-    app.logger.info(f"Processing the YT video: {temp_file_name}")
+    logger.info(f"Processing the YT video: {temp_file_name}")
     full_audio = mp.AudioFileClip(temp_file_name)
     duration = full_audio.duration
     chunk_length = 600  # 10 minutes in seconds
diff --git a/lib/utils/alwrity_utils.py b/lib/utils/alwrity_utils.py
index 1b1cba85..b37d623b 100644
--- a/lib/utils/alwrity_utils.py
+++ b/lib/utils/alwrity_utils.py
@@ -169,7 +169,8 @@ def blog_from_keyword():
                 st.error('🚫 Blog keywords should be at least two words long. Please try again.')
         
         elif input_type == "youtube_url" or input_type == "audio_file":
-            generate_audio_blog(user_input)
+            if not generate_audio_blog(user_input):
+                st.stop()
         
         elif input_type == "web_url":
             blog_from_url(user_input)
diff --git a/requirements.txt b/requirements.txt
index 7c3b7c21..75deaf0a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -38,4 +38,5 @@ streamlit-mic-recorder
 tinify
 cloudscraper
 xmlschema
+moviepy
 googlesearch-python