Alwrity - WIP - main_config

2024-04-07 20:47:49 +05:30
parent e33008659b
commit 23b3c7f6e0
23 changed files with 313 additions and 327 deletions
--- a/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py
+++ b/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py
@@ -0,0 +1,88 @@
+from pytube import YouTube
+import os
+import sys
+from loguru import logger
+from openai import OpenAI
+from tqdm import tqdm
+
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_random_exponential,
+)  # for exponential backoff
+
+
+def progress_function(stream, chunk, bytes_remaining):
+    # Calculate the percentage completion
+    current = ((stream.filesize - bytes_remaining) / stream.filesize)
+    progress_bar.update(current - progress_bar.n)  # Update the progress bar
+
+
+@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
+def speech_to_text(video_url, output_path='.'):
+    """
+    Transcribes speech to text from a YouTube video URL using OpenAI's Whisper model.
+
+    Args:
+        video_url (str): URL of the YouTube video to transcribe.
+        output_path (str, optional): Directory where the audio file will be saved. Defaults to '.'.
+
+    Returns:
+        str: The transcribed text from the video.
+
+    Raises:
+        SystemExit: If a critical error occurs that prevents successful execution.
+    """
+    try:
+        logger.info(f"Accessing YouTube URL: {video_url}")
+        yt = YouTube(video_url, on_progress_callback=progress_function)
+
+        logger.info("Fetching the highest quality audio stream")
+        audio_stream = yt.streams.filter(only_audio=True).first()
+
+        if audio_stream is None:
+            logger.warning("No audio stream found for this video.")
+            return None
+
+        #logger.info(f"Downloading audio for: {yt.title}")
+        global progress_bar
+        progress_bar = tqdm(total=1.0, unit='iB', unit_scale=True, desc=yt.title)
+        audio_file = audio_stream.download(output_path)
+        progress_bar.close()
+        logger.info(f"Audio downloaded: {yt.title} to {output_path}")
+
+        # Checking file size
+        max_file_size = 24 * 1024 * 1024  # 24MB
+        file_size = os.path.getsize(audio_file)
+        # Convert file size to MB for logging
+        file_size_MB = file_size / (1024 * 1024)  # Convert bytes to MB
+        logger.info(f"Downloaded Audio Size is: {file_size_MB:.2f} MB")
+        if file_size > max_file_size:
+            logger.error("File size exceeds 24MB limit.")
+            sys.exit("File size limit exceeded.")
+
+        try:
+            logger.info("Initializing OpenAI client for transcription.")
+            client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
+
+            logger.info("Transcribing using OpenAI's Whisper model.")
+            transcript = client.audio.transcriptions.create(
+                model="whisper-1",
+                file=open(audio_file, "rb"),
+                response_format="text"
+            )
+            logger.info(f"\nYouTube video transcription:\n\n{transcript}\n")
+            return transcript, yt.title
+
+        except Exception as e:
+            logger.error(f"Failed in Whisper transcription: {e}")
+            sys.exit("Transcription failure.")
+
+    except Exception as e:
+        logger.error(f"An error occurred during YouTube video processing: {e}")
+        sys.exit("Video processing failure.")
+
+    finally:
+        if os.path.exists(audio_file):
+            os.remove(audio_file)
+            logger.info("Temporary audio file removed.")