main_config changes - WIP
This commit is contained in:
114
lib/speech_to_blog/main_audio_to_blog.py
Normal file
114
lib/speech_to_blog/main_audio_to_blog.py
Normal file
@@ -0,0 +1,114 @@
|
||||
import json
|
||||
import os
|
||||
import datetime #I wish
|
||||
import sys
|
||||
|
||||
import openai
|
||||
from tqdm import tqdm, trange
|
||||
import time
|
||||
import re
|
||||
from textwrap import dedent
|
||||
import nltk
|
||||
nltk.download('punkt', quiet=True)
|
||||
from nltk.corpus import stopwords
|
||||
nltk.download('stopwords', quiet=True)
|
||||
|
||||
from .write_blogs_from_youtube_videos import youtube_to_blog
|
||||
from .wordpress_blog_uploader import compress_image, upload_blog_post, upload_media
|
||||
from .gpt_online_researcher import do_online_research
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def generate_youtube_blog(yt_url_list, output_format="markdown"):
|
||||
"""Takes a list of youtube videos and generates blog for each one of them.
|
||||
"""
|
||||
# Use to store the blog in a string, to save in a *.md file.
|
||||
blog_markdown_str = ""
|
||||
for a_yt_url in yt_url_list:
|
||||
try:
|
||||
logger.info(f"Starting to write blog on URL: {a_yt_url}")
|
||||
yt_blog = youtube_to_blog(a_yt_url)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in youtube_to_blog: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
logger.info("Starting with online research for URL title.")
|
||||
research_report = do_online_research(yt_blog)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in do_online_research: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Note: Check if the order of input matters for your function
|
||||
logger.info("Preparing a blog content from audio script and online research content...")
|
||||
blog_with_research(research_report, yt_blog)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in blog_with_research: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Get the title and meta description of the blog.
|
||||
blog_meta_desc = generate_blog_description(yt_blog)
|
||||
title = generate_blog_title(blog_meta_desc)
|
||||
logger.info(f"Title is {title} and description is {blog_meta_desc}")
|
||||
blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
|
||||
# Get blog tags and categories.
|
||||
blog_tags = get_blog_tags(blog_meta_desc)
|
||||
logger.info(f"Blog tags are: {blog_tags}")
|
||||
blog_categories = get_blog_categories(blog_meta_desc)
|
||||
logger.info(f"Blog categories are: {blog_categories}")
|
||||
|
||||
# Generate an introduction for the blog
|
||||
blog_intro = get_blog_intro(title, yt_blog)
|
||||
logger.info(f"The Blog intro is:\n {blog_intro}")
|
||||
blog_markdown_str = blog_markdown_str + "\n\n" + f"{blog_intro}" + "\n\n"
|
||||
|
||||
# Generate an image based on meta description
|
||||
logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
|
||||
main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
|
||||
|
||||
# Get a variation of the yt url screenshot to use in the blog.
|
||||
#varied_img_path = gen_new_from_given_img(yt_img_path, image_dir)
|
||||
#logger.info(f"Image path: {main_img_path} and varied path: {varied_img_path}")
|
||||
#blog_markdown_str = blog_markdown_str + f'})' + '_Image Caption_'
|
||||
|
||||
#stbdiff_img_path = generate_image(yt_img_path, image_dir, "stable_diffusion")
|
||||
#logger.info(f"Image path: {main_img_path} from stable diffusion: {stbdiff_img_path}")
|
||||
#blog_markdown_str = blog_markdown_str + f'})' + f'_{title}_'
|
||||
|
||||
# Add the body of the blog content.
|
||||
blog_markdown_str = blog_markdown_str + "\n\n" + f'{yt_blog}' + "\n\n"
|
||||
|
||||
# Get the Conclusion of the blog, by passing the generated blog.
|
||||
blog_conclusion = get_blog_conclusion(blog_markdown_str)
|
||||
# TBD: Add another image.
|
||||
blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n\n" + f"{blog_conclusion}" + "\n"
|
||||
|
||||
# Proofread the blog, edit and remove dubplicates and refine it further.
|
||||
# Presently, fixing the blog keywords to be tags and categories.
|
||||
blog_keywords = f"{blog_tags} + {blog_categories}"
|
||||
blog_markdown_str = blog_proof_editor(blog_markdown_str, blog_keywords)
|
||||
|
||||
# Check the type of blog format needed by the user.
|
||||
if 'html' in output_format:
|
||||
blog_markdown_str = convert_tomarkdown_format(blog_markdown_str)
|
||||
elif 'markdown' in output_path:
|
||||
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
|
||||
|
||||
# Try to save the blog content in a file, in whichever format. Just dump it.
|
||||
try:
|
||||
save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
|
||||
except Exception as err:
|
||||
logger.error("Failed to Save blog content: {blog_markdown_str}")
|
||||
|
||||
except Exception as e:
|
||||
# raise assertionerror
|
||||
logger.error(f"Error: Failed to generate_youtube_blog: {e}")
|
||||
exit(1)
|
||||
150
lib/speech_to_blog/main_youtube_research_blog.py
Normal file
150
lib/speech_to_blog/main_youtube_research_blog.py
Normal file
@@ -0,0 +1,150 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from loguru import logger
|
||||
|
||||
# Import from local packages
|
||||
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from .gpt_providers.gpt_vision_img_details import analyze_and_extract_details_from_image
|
||||
from .generate_image_from_prompt import generate_image
|
||||
from .write_blogs_from_youtube_videos import youtube_to_blog
|
||||
from .wordpress_blog_uploader import compress_image, upload_blog_post, upload_media
|
||||
from .gpt_online_researcher import do_online_research
|
||||
from .save_blog_to_file import save_blog_to_file
|
||||
from .optimize_images_for_upload import optimize_image
|
||||
from .combine_research_and_blog import blog_with_research
|
||||
from .get_blog_meta_desc import generate_blog_description
|
||||
from .get_blog_title import generate_blog_title
|
||||
from .get_tags import get_blog_tags
|
||||
from .get_blog_category import get_blog_categories
|
||||
from .convert_content_to_markdown import convert_tomarkdown_format
|
||||
from .convert_markdown_to_html import convert_markdown_to_html
|
||||
from .utils.youtube_keyword_research import research_yt
|
||||
|
||||
# Configuring the logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout, colorize=True, format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}")
|
||||
|
||||
# Constants for directory paths
|
||||
IMAGE_DIR = os.path.join(os.getcwd(), "blog_images")
|
||||
OUTPUT_PATH = os.path.join(os.getcwd(), "blogs")
|
||||
|
||||
|
||||
def generate_youtube_research_blog(yt_keywords):
|
||||
"""
|
||||
Research YouTube based on given keywords and get top video URLs.
|
||||
"""
|
||||
for ayt_keyword in yt_keywords:
|
||||
yt_research_response = ''
|
||||
data = {}
|
||||
logger.info(f"Researching YouTube top videos for: {yt_keywords}")
|
||||
try:
|
||||
yt_research_response = research_yt(ayt_keyword)
|
||||
if not yt_research_response:
|
||||
yt_research_response = research_yt(ayt_keyword)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do YouTube Research: {err}")
|
||||
|
||||
if not yt_research_response.strip():
|
||||
logger.warning("Error: JSON data is empty.")
|
||||
yt_research_response = research_yt(ayt_keyword)
|
||||
else:
|
||||
try:
|
||||
aggregated_data = load_response_json(yt_research_response, ayt_keyword)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to load json response: {err}")
|
||||
sys.exit(1)
|
||||
|
||||
for title, a_yt_url, views, references, quickstart_code in zip(
|
||||
aggregated_data["titles"], aggregated_data["urls"], aggregated_data["views"],
|
||||
aggregated_data["references"], aggregated_data["quickstart_codes"]):
|
||||
blog_markdown_str = ""
|
||||
if a_yt_url != "No URL Provided":
|
||||
# Transcribe the audio using whisper model.
|
||||
try:
|
||||
logger.info(f"Starting to write blog on URL: {a_yt_url}")
|
||||
blog_markdown_str, yt_title = youtube_to_blog(a_yt_url)
|
||||
logger.warning("\n\n--------------- First Draft of the Blog: --------\n\n")
|
||||
logger.info(f"{blog_markdown_str}\n")
|
||||
logger.warning("--------------------END of First draft----------\n\n")
|
||||
if not yt_title or not blog_markdown_str:
|
||||
logger.error("No content or title for audio to proceed.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in youtube_to_blog: {e}")
|
||||
sys.exit(1)
|
||||
sys.exit(1)
|
||||
|
||||
if title != "Unknown Title":
|
||||
print(f"Title: {title}")
|
||||
if url != "No URL Provided":
|
||||
print(f"URL: {url}")
|
||||
if views != "No View Count":
|
||||
print(f"Views: {views}")
|
||||
if references: # Checks if references list is not empty
|
||||
print(f"References: {', '.join(references)}")
|
||||
if quickstart_code != "Code coming soon":
|
||||
print(f"Quickstart Code: {quickstart_code}")
|
||||
print() # Adds a newline for separation between entries
|
||||
|
||||
|
||||
|
||||
def load_response_json(yt_research_response, yt_keyword):
|
||||
"""
|
||||
Load and parse the YouTube research response JSON.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Loading the JSON data for parsing: {yt_research_response}")
|
||||
data = json.loads(yt_research_response.replace('`', '').strip())
|
||||
|
||||
if isinstance(data, dict):
|
||||
results_key = next((key for key in data if key.lower().startswith("result")), None)
|
||||
if results_key:
|
||||
research_yt_dict = process_results(data[results_key])
|
||||
elif isinstance(data, list):
|
||||
research_yt_dict = process_results(data)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"load_response_json: Failed to parse JSON data: {e}")
|
||||
generate_youtube_research_blog([yt_keyword])
|
||||
|
||||
return research_yt_dict
|
||||
|
||||
|
||||
def process_results(results):
|
||||
"""
|
||||
Process the results from the YouTube research JSON and return the aggregated data.
|
||||
|
||||
Args:
|
||||
results (list): List of dictionaries containing YouTube video details.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing lists of titles, URLs, views, references, and quickstart codes.
|
||||
|
||||
Raises:
|
||||
Exception: If an error occurs during the processing of individual entries.
|
||||
"""
|
||||
titles = []
|
||||
urls = []
|
||||
views_list = []
|
||||
references_list = []
|
||||
quickstart_codes = []
|
||||
|
||||
for entry in results:
|
||||
try:
|
||||
titles.append(entry.get("Title", "Unknown Title"))
|
||||
urls.append(entry.get("URL", "No URL Provided"))
|
||||
views_list.append(entry.get("Views", "No View Count"))
|
||||
references_list.append(entry.get("References", []))
|
||||
quickstart_codes.append(entry.get("Quickstart_Code", "Code coming soon"))
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing yt resulr entry: {e}")
|
||||
continue
|
||||
|
||||
return {
|
||||
"titles": titles,
|
||||
"urls": urls,
|
||||
"views": views_list,
|
||||
"references": references_list,
|
||||
"quickstart_codes": quickstart_codes
|
||||
}
|
||||
97
lib/speech_to_blog/write_blogs_from_youtube_videos.py
Normal file
97
lib/speech_to_blog/write_blogs_from_youtube_videos.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import os
|
||||
import time
|
||||
import sys
|
||||
|
||||
from pytube import YouTube
|
||||
import tempfile
|
||||
import openai
|
||||
from html2image import Html2Image
|
||||
from tqdm import tqdm, trange
|
||||
import google.generativeai as genai
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
from .gpt_providers.stt_audio_blog import speech_to_text
|
||||
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
|
||||
|
||||
def youtube_to_blog(video_url):
|
||||
"""Function to transcribe a given youtube url """
|
||||
# fixme: Doesnt work all types of yt urls.
|
||||
vid_id = video_url.split("=")[1]
|
||||
#hti = Html2Image(output_path="../blog_images")
|
||||
#hti.screenshot(url=video_url, save_as=f"yt-img-{vid_id}.png")
|
||||
#yt_img_path = os.path.join("../blog_images", f"yt-img-{vid_id}.png")
|
||||
|
||||
try:
|
||||
# Starting the speech-to-text process
|
||||
logger.info("Starting with Speech to Text.")
|
||||
audio_text, audio_title = speech_to_text(video_url)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in speech_to_text: {e}")
|
||||
sys.exit(1) # Exit the program due to error in speech_to_text
|
||||
|
||||
try:
|
||||
# Summarizing the content of the YouTube video
|
||||
audio_blog_content = summarize_youtube_video(audio_text, "gemini")
|
||||
logger.info("Successfully converted given URL to blog article.")
|
||||
return audio_blog_content, audio_title
|
||||
except Exception as e:
|
||||
logger.error(f"Error in summarize_youtube_video: {e}")
|
||||
sys.exit(1) # Exit the program due to error in summarize_youtube_video
|
||||
return audio_blog_content
|
||||
|
||||
|
||||
def summarize_youtube_video(user_content, gpt_providers):
|
||||
"""Generates a summary of a YouTube video using OpenAI GPT-3 and displays a progress bar.
|
||||
Args:
|
||||
video_link: The URL of the YouTube video to summarize.
|
||||
Returns:
|
||||
A string containing the summary of the video.
|
||||
"""
|
||||
|
||||
logger.info("Start summarize_youtube_video..")
|
||||
prompt = f"""
|
||||
You are an expert copywriter specializing in digital content writing. I will provide you with a transcript.
|
||||
Your task is to transform a given transcript into a well-structured and informative blog article.
|
||||
Please follow the below objectives:
|
||||
|
||||
1. Master the Transcript: Understand main ideas, key points, and the core message.
|
||||
2. Sentence Structure: Rephrase while preserving logical flow and coherence. Dont quote anyone from video.
|
||||
3. Note: Check if the transcript is about programming, then include code examples and snippets in your article.
|
||||
4. Write Unique Content: Avoid direct copying; rewrite in your own words.
|
||||
5. REMEMBER to avoid direct quoting and maintain uniqueness.
|
||||
6. Proofread: Check for grammar, spelling, and punctuation errors.
|
||||
7. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, interjections, and colloquialisms. 8. Avoid repetitive phrases and unnatural sentence structures.
|
||||
9. Ensure Uniqueness: Guarantee the article is plagiarism-free.
|
||||
10. Punctuation: Use appropriate question marks at the end of questions.
|
||||
11. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
|
||||
12. Rephrase words like 'video, youtube, channel' with 'article, blog' and such suitable words.
|
||||
|
||||
Follow the above guidelines to create a well-optimized, unique, and informative article,
|
||||
that will rank well in search engine results and engage readers effectively.
|
||||
Follow above guidelines to craft a blog content from the following transcript:\n{user_content}
|
||||
"""
|
||||
if 'gemini' in gpt_providers:
|
||||
try:
|
||||
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
|
||||
except Exception as err:
|
||||
logger.error("Failed in getting GEMINI_API_KEY")
|
||||
# Use gemini-pro model for text and image.
|
||||
model = genai.GenerativeModel('gemini-pro')
|
||||
try:
|
||||
response = model.generate_content(prompt)
|
||||
return response.text
|
||||
except Exception as err:
|
||||
logger.error("Failed to get response from gemini.")
|
||||
elif 'openai' in gpt_providers:
|
||||
try:
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating blog summary: {err}")
|
||||
Reference in New Issue
Block a user