main_config changes - WIP

This commit is contained in:
AjaySi
2024-03-27 22:19:16 +05:30
parent e5a5372a29
commit 74b7bc3cbe
16 changed files with 63 additions and 543 deletions

View File

@@ -0,0 +1,114 @@
import json
import os
import datetime #I wish
import sys
import openai
from tqdm import tqdm, trange
import time
import re
from textwrap import dedent
import nltk
nltk.download('punkt', quiet=True)
from nltk.corpus import stopwords
nltk.download('stopwords', quiet=True)
from .write_blogs_from_youtube_videos import youtube_to_blog
from .wordpress_blog_uploader import compress_image, upload_blog_post, upload_media
from .gpt_online_researcher import do_online_research
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
def generate_youtube_blog(yt_url_list, output_format="markdown"):
"""Takes a list of youtube videos and generates blog for each one of them.
"""
# Use to store the blog in a string, to save in a *.md file.
blog_markdown_str = ""
for a_yt_url in yt_url_list:
try:
logger.info(f"Starting to write blog on URL: {a_yt_url}")
yt_blog = youtube_to_blog(a_yt_url)
except Exception as e:
logger.error(f"Error in youtube_to_blog: {e}")
sys.exit(1)
try:
logger.info("Starting with online research for URL title.")
research_report = do_online_research(yt_blog)
except Exception as e:
logger.error(f"Error in do_online_research: {e}")
sys.exit(1)
try:
# Note: Check if the order of input matters for your function
logger.info("Preparing a blog content from audio script and online research content...")
blog_with_research(research_report, yt_blog)
except Exception as e:
logger.error(f"Error in blog_with_research: {e}")
sys.exit(1)
try:
# Get the title and meta description of the blog.
blog_meta_desc = generate_blog_description(yt_blog)
title = generate_blog_title(blog_meta_desc)
logger.info(f"Title is {title} and description is {blog_meta_desc}")
blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
# Get blog tags and categories.
blog_tags = get_blog_tags(blog_meta_desc)
logger.info(f"Blog tags are: {blog_tags}")
blog_categories = get_blog_categories(blog_meta_desc)
logger.info(f"Blog categories are: {blog_categories}")
# Generate an introduction for the blog
blog_intro = get_blog_intro(title, yt_blog)
logger.info(f"The Blog intro is:\n {blog_intro}")
blog_markdown_str = blog_markdown_str + "\n\n" + f"{blog_intro}" + "\n\n"
# Generate an image based on meta description
logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
# Get a variation of the yt url screenshot to use in the blog.
#varied_img_path = gen_new_from_given_img(yt_img_path, image_dir)
#logger.info(f"Image path: {main_img_path} and varied path: {varied_img_path}")
#blog_markdown_str = blog_markdown_str + f'![img-description]({os.path.basename(varied_img_path)})' + '_Image Caption_'
#stbdiff_img_path = generate_image(yt_img_path, image_dir, "stable_diffusion")
#logger.info(f"Image path: {main_img_path} from stable diffusion: {stbdiff_img_path}")
#blog_markdown_str = blog_markdown_str + f'![img-description]({os.path.basename(stbdiff_img_path)})' + f'_{title}_'
# Add the body of the blog content.
blog_markdown_str = blog_markdown_str + "\n\n" + f'{yt_blog}' + "\n\n"
# Get the Conclusion of the blog, by passing the generated blog.
blog_conclusion = get_blog_conclusion(blog_markdown_str)
# TBD: Add another image.
blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n\n" + f"{blog_conclusion}" + "\n"
# Proofread the blog, edit and remove dubplicates and refine it further.
# Presently, fixing the blog keywords to be tags and categories.
blog_keywords = f"{blog_tags} + {blog_categories}"
blog_markdown_str = blog_proof_editor(blog_markdown_str, blog_keywords)
# Check the type of blog format needed by the user.
if 'html' in output_format:
blog_markdown_str = convert_tomarkdown_format(blog_markdown_str)
elif 'markdown' in output_path:
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
# Try to save the blog content in a file, in whichever format. Just dump it.
try:
save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
except Exception as err:
logger.error("Failed to Save blog content: {blog_markdown_str}")
except Exception as e:
# raise assertionerror
logger.error(f"Error: Failed to generate_youtube_blog: {e}")
exit(1)

View File

@@ -0,0 +1,150 @@
import json
import os
import sys
from loguru import logger
# Import from local packages
from .gpt_providers.openai_chat_completion import openai_chatgpt
from .gpt_providers.gpt_vision_img_details import analyze_and_extract_details_from_image
from .generate_image_from_prompt import generate_image
from .write_blogs_from_youtube_videos import youtube_to_blog
from .wordpress_blog_uploader import compress_image, upload_blog_post, upload_media
from .gpt_online_researcher import do_online_research
from .save_blog_to_file import save_blog_to_file
from .optimize_images_for_upload import optimize_image
from .combine_research_and_blog import blog_with_research
from .get_blog_meta_desc import generate_blog_description
from .get_blog_title import generate_blog_title
from .get_tags import get_blog_tags
from .get_blog_category import get_blog_categories
from .convert_content_to_markdown import convert_tomarkdown_format
from .convert_markdown_to_html import convert_markdown_to_html
from .utils.youtube_keyword_research import research_yt
# Configuring the logger
logger.remove()
logger.add(sys.stdout, colorize=True, format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}")
# Constants for directory paths
IMAGE_DIR = os.path.join(os.getcwd(), "blog_images")
OUTPUT_PATH = os.path.join(os.getcwd(), "blogs")
def generate_youtube_research_blog(yt_keywords):
"""
Research YouTube based on given keywords and get top video URLs.
"""
for ayt_keyword in yt_keywords:
yt_research_response = ''
data = {}
logger.info(f"Researching YouTube top videos for: {yt_keywords}")
try:
yt_research_response = research_yt(ayt_keyword)
if not yt_research_response:
yt_research_response = research_yt(ayt_keyword)
except Exception as err:
logger.error(f"Failed to do YouTube Research: {err}")
if not yt_research_response.strip():
logger.warning("Error: JSON data is empty.")
yt_research_response = research_yt(ayt_keyword)
else:
try:
aggregated_data = load_response_json(yt_research_response, ayt_keyword)
except Exception as err:
logger.error(f"Failed to load json response: {err}")
sys.exit(1)
for title, a_yt_url, views, references, quickstart_code in zip(
aggregated_data["titles"], aggregated_data["urls"], aggregated_data["views"],
aggregated_data["references"], aggregated_data["quickstart_codes"]):
blog_markdown_str = ""
if a_yt_url != "No URL Provided":
# Transcribe the audio using whisper model.
try:
logger.info(f"Starting to write blog on URL: {a_yt_url}")
blog_markdown_str, yt_title = youtube_to_blog(a_yt_url)
logger.warning("\n\n--------------- First Draft of the Blog: --------\n\n")
logger.info(f"{blog_markdown_str}\n")
logger.warning("--------------------END of First draft----------\n\n")
if not yt_title or not blog_markdown_str:
logger.error("No content or title for audio to proceed.")
sys.exit(1)
except Exception as e:
logger.error(f"Error in youtube_to_blog: {e}")
sys.exit(1)
sys.exit(1)
if title != "Unknown Title":
print(f"Title: {title}")
if url != "No URL Provided":
print(f"URL: {url}")
if views != "No View Count":
print(f"Views: {views}")
if references: # Checks if references list is not empty
print(f"References: {', '.join(references)}")
if quickstart_code != "Code coming soon":
print(f"Quickstart Code: {quickstart_code}")
print() # Adds a newline for separation between entries
def load_response_json(yt_research_response, yt_keyword):
"""
Load and parse the YouTube research response JSON.
"""
try:
logger.info(f"Loading the JSON data for parsing: {yt_research_response}")
data = json.loads(yt_research_response.replace('`', '').strip())
if isinstance(data, dict):
results_key = next((key for key in data if key.lower().startswith("result")), None)
if results_key:
research_yt_dict = process_results(data[results_key])
elif isinstance(data, list):
research_yt_dict = process_results(data)
except json.JSONDecodeError as e:
logger.error(f"load_response_json: Failed to parse JSON data: {e}")
generate_youtube_research_blog([yt_keyword])
return research_yt_dict
def process_results(results):
"""
Process the results from the YouTube research JSON and return the aggregated data.
Args:
results (list): List of dictionaries containing YouTube video details.
Returns:
dict: A dictionary containing lists of titles, URLs, views, references, and quickstart codes.
Raises:
Exception: If an error occurs during the processing of individual entries.
"""
titles = []
urls = []
views_list = []
references_list = []
quickstart_codes = []
for entry in results:
try:
titles.append(entry.get("Title", "Unknown Title"))
urls.append(entry.get("URL", "No URL Provided"))
views_list.append(entry.get("Views", "No View Count"))
references_list.append(entry.get("References", []))
quickstart_codes.append(entry.get("Quickstart_Code", "Code coming soon"))
except Exception as e:
logger.error(f"Error processing yt resulr entry: {e}")
continue
return {
"titles": titles,
"urls": urls,
"views": views_list,
"references": references_list,
"quickstart_codes": quickstart_codes
}

View File

@@ -0,0 +1,97 @@
import os
import time
import sys
from pytube import YouTube
import tempfile
import openai
from html2image import Html2Image
from tqdm import tqdm, trange
import google.generativeai as genai
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from .gpt_providers.stt_audio_blog import speech_to_text
from .gpt_providers.openai_chat_completion import openai_chatgpt
def youtube_to_blog(video_url):
"""Function to transcribe a given youtube url """
# fixme: Doesnt work all types of yt urls.
vid_id = video_url.split("=")[1]
#hti = Html2Image(output_path="../blog_images")
#hti.screenshot(url=video_url, save_as=f"yt-img-{vid_id}.png")
#yt_img_path = os.path.join("../blog_images", f"yt-img-{vid_id}.png")
try:
# Starting the speech-to-text process
logger.info("Starting with Speech to Text.")
audio_text, audio_title = speech_to_text(video_url)
except Exception as e:
logger.error(f"Error in speech_to_text: {e}")
sys.exit(1) # Exit the program due to error in speech_to_text
try:
# Summarizing the content of the YouTube video
audio_blog_content = summarize_youtube_video(audio_text, "gemini")
logger.info("Successfully converted given URL to blog article.")
return audio_blog_content, audio_title
except Exception as e:
logger.error(f"Error in summarize_youtube_video: {e}")
sys.exit(1) # Exit the program due to error in summarize_youtube_video
return audio_blog_content
def summarize_youtube_video(user_content, gpt_providers):
"""Generates a summary of a YouTube video using OpenAI GPT-3 and displays a progress bar.
Args:
video_link: The URL of the YouTube video to summarize.
Returns:
A string containing the summary of the video.
"""
logger.info("Start summarize_youtube_video..")
prompt = f"""
You are an expert copywriter specializing in digital content writing. I will provide you with a transcript.
Your task is to transform a given transcript into a well-structured and informative blog article.
Please follow the below objectives:
1. Master the Transcript: Understand main ideas, key points, and the core message.
2. Sentence Structure: Rephrase while preserving logical flow and coherence. Dont quote anyone from video.
3. Note: Check if the transcript is about programming, then include code examples and snippets in your article.
4. Write Unique Content: Avoid direct copying; rewrite in your own words.
5. REMEMBER to avoid direct quoting and maintain uniqueness.
6. Proofread: Check for grammar, spelling, and punctuation errors.
7. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, interjections, and colloquialisms. 8. Avoid repetitive phrases and unnatural sentence structures.
9. Ensure Uniqueness: Guarantee the article is plagiarism-free.
10. Punctuation: Use appropriate question marks at the end of questions.
11. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
12. Rephrase words like 'video, youtube, channel' with 'article, blog' and such suitable words.
Follow the above guidelines to create a well-optimized, unique, and informative article,
that will rank well in search engine results and engage readers effectively.
Follow above guidelines to craft a blog content from the following transcript:\n{user_content}
"""
if 'gemini' in gpt_providers:
try:
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
except Exception as err:
logger.error("Failed in getting GEMINI_API_KEY")
# Use gemini-pro model for text and image.
model = genai.GenerativeModel('gemini-pro')
try:
response = model.generate_content(prompt)
return response.text
except Exception as err:
logger.error("Failed to get response from gemini.")
elif 'openai' in gpt_providers:
try:
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in generating blog summary: {err}")