Alwrity - WIP - main_config
This commit is contained in:
88
lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py
Normal file
88
lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py
Normal file
@@ -0,0 +1,88 @@
|
||||
from pytube import YouTube
|
||||
import os
|
||||
import sys
|
||||
from loguru import logger
|
||||
from openai import OpenAI
|
||||
from tqdm import tqdm
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
|
||||
def progress_function(stream, chunk, bytes_remaining):
|
||||
# Calculate the percentage completion
|
||||
current = ((stream.filesize - bytes_remaining) / stream.filesize)
|
||||
progress_bar.update(current - progress_bar.n) # Update the progress bar
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def speech_to_text(video_url, output_path='.'):
|
||||
"""
|
||||
Transcribes speech to text from a YouTube video URL using OpenAI's Whisper model.
|
||||
|
||||
Args:
|
||||
video_url (str): URL of the YouTube video to transcribe.
|
||||
output_path (str, optional): Directory where the audio file will be saved. Defaults to '.'.
|
||||
|
||||
Returns:
|
||||
str: The transcribed text from the video.
|
||||
|
||||
Raises:
|
||||
SystemExit: If a critical error occurs that prevents successful execution.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Accessing YouTube URL: {video_url}")
|
||||
yt = YouTube(video_url, on_progress_callback=progress_function)
|
||||
|
||||
logger.info("Fetching the highest quality audio stream")
|
||||
audio_stream = yt.streams.filter(only_audio=True).first()
|
||||
|
||||
if audio_stream is None:
|
||||
logger.warning("No audio stream found for this video.")
|
||||
return None
|
||||
|
||||
#logger.info(f"Downloading audio for: {yt.title}")
|
||||
global progress_bar
|
||||
progress_bar = tqdm(total=1.0, unit='iB', unit_scale=True, desc=yt.title)
|
||||
audio_file = audio_stream.download(output_path)
|
||||
progress_bar.close()
|
||||
logger.info(f"Audio downloaded: {yt.title} to {output_path}")
|
||||
|
||||
# Checking file size
|
||||
max_file_size = 24 * 1024 * 1024 # 24MB
|
||||
file_size = os.path.getsize(audio_file)
|
||||
# Convert file size to MB for logging
|
||||
file_size_MB = file_size / (1024 * 1024) # Convert bytes to MB
|
||||
logger.info(f"Downloaded Audio Size is: {file_size_MB:.2f} MB")
|
||||
if file_size > max_file_size:
|
||||
logger.error("File size exceeds 24MB limit.")
|
||||
sys.exit("File size limit exceeded.")
|
||||
|
||||
try:
|
||||
logger.info("Initializing OpenAI client for transcription.")
|
||||
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
||||
|
||||
logger.info("Transcribing using OpenAI's Whisper model.")
|
||||
transcript = client.audio.transcriptions.create(
|
||||
model="whisper-1",
|
||||
file=open(audio_file, "rb"),
|
||||
response_format="text"
|
||||
)
|
||||
logger.info(f"\nYouTube video transcription:\n\n{transcript}\n")
|
||||
return transcript, yt.title
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed in Whisper transcription: {e}")
|
||||
sys.exit("Transcription failure.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred during YouTube video processing: {e}")
|
||||
sys.exit("Video processing failure.")
|
||||
|
||||
finally:
|
||||
if os.path.exists(audio_file):
|
||||
os.remove(audio_file)
|
||||
logger.info("Temporary audio file removed.")
|
||||
Reference in New Issue
Block a user