from pytube import YouTube import os import sys from loguru import logger from openai import OpenAI from tenacity import ( retry, stop_after_attempt, wait_random_exponential, ) # for exponential backoff @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) def speech_to_text(video_url, output_path='.'): """ Transcribes speech to text from a YouTube video URL using OpenAI's Whisper model. Args: video_url (str): URL of the YouTube video to transcribe. output_path (str, optional): Directory where the audio file will be saved. Defaults to '.'. Returns: str: The transcribed text from the video. Raises: SystemExit: If a critical error occurs that prevents successful execution. """ try: logger.info(f"Accessing YouTube URL: {video_url}") yt = YouTube(video_url) logger.info("Fetching the highest quality audio stream") audio_stream = yt.streams.filter(only_audio=True).first() if audio_stream is None: logger.warning("No audio stream found for this video.") return None logger.info(f"Downloading audio for: {yt.title}") audio_file = audio_stream.download(output_path) logger.info(f"Audio downloaded: {yt.title} to {output_path}") # Checking file size max_file_size = 24 * 1024 * 1024 # 24MB logger.info(f"Downloaded Audio Size is: {max_file_size}") file_size = os.path.getsize(audio_file) if file_size > max_file_size: logger.error("File size exceeds 24MB limit.") sys.exit("File size limit exceeded.") try: logger.info("Initializing OpenAI client for transcription.") client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) logger.info("Transcribing using OpenAI's Whisper model.") transcript = client.audio.transcriptions.create( model="whisper-1", file=open(audio_file, "rb"), response_format="text" ) return transcript, yt.title except Exception as e: logger.error(f"Failed in Whisper transcription: {e}") sys.exit("Transcription failure.") except Exception as e: logger.error(f"An error occurred during YouTube video processing: {e}") sys.exit("Video processing failure.") finally: if os.path.exists(audio_file): os.remove(audio_file) logger.info("Temporary audio file removed.")