Blogen-V0.1 Added features. WIP
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -16,4 +16,4 @@ pseo-experiments/lib/python3.10/
|
|||||||
pseo-experiments/bin/
|
pseo-experiments/bin/
|
||||||
blog_images/
|
blog_images/
|
||||||
blogs/
|
blogs/
|
||||||
pseo_website/
|
.env
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
true
|
|
||||||
@@ -1,31 +1,35 @@
|
|||||||
|
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||||
|
|
||||||
|
|
||||||
def blog_with_research(report, blog):
|
def blog_with_research(report, blog):
|
||||||
"""Combine the given online research and gpt blog content"""
|
"""Combine the given online research and gpt blog content"""
|
||||||
|
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
You are an expert copywriter specializing in content optimization for SEO.
|
You are an expert copywriter specializing in content optimization for SEO.
|
||||||
I will provide you with a research report and a blog content on the same topic.
|
I will provide you with a 'research report' and a 'blog content' on the same topic.
|
||||||
Treat the research report as the context for the blog and better it accordingly.
|
|
||||||
Your task is to transform and combine the given research and blog content into a well-structured, unique
|
Your task is to transform and combine the given research and blog content into a well-structured, unique
|
||||||
and engaging blog article.
|
and engaging blog article.
|
||||||
|
|
||||||
Your objectives include:
|
Your objectives include:
|
||||||
1. Master the report and blog content: Understand main ideas, key points, and the core message.
|
1. Master the report and blog content: Understand main ideas, key points, and the core message.
|
||||||
2. Sentence Structure: Rephrase while preserving logical flow and coherence.
|
2. Sentence Structure: Rephrase while preserving logical flow and coherence.
|
||||||
3. Identify Main Keyword: Determine the primary topic and combine the articles on the main topic.
|
3. Identify Main Keywords: Determine the primary topic and combine the articles on the main topic.
|
||||||
4. Keyword Integration: Naturally integrate keywords in headings, subheadings, and body text, avoiding overuse.
|
4. REMEMBER: From the research report, include links and cititations to make your article more authoratative.
|
||||||
5. Write Unique Content: Avoid direct copying from given report and blog; rewrite in your own words and style.
|
5. Write Code snippets: Check if given report is on programming, then write code snippets where applicable.
|
||||||
6. Optimize for SEO: Generate high quality informative content.
|
6. Optimize for SEO: Generate high quality informative content.
|
||||||
Implement SEO best practises with appropriate keyword density.
|
Implement SEO best practises with appropriate keyword density.
|
||||||
7. Craft Engaging and Informative Article: Provide value and insight to readers.
|
7. Craft Engaging and Informative Article: Provide value and insight to readers.
|
||||||
8. Proofread: Important to Check for grammar, spelling, and punctuation errors.
|
8. Proofread: Important to Check for grammar, spelling, and punctuation errors.
|
||||||
9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases,
|
9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases,
|
||||||
interjections, and colloquialisms. Avoid repetitive phrases and unnatural sentence structures.
|
interjections, and colloquialisms. Avoid repetitive phrases and unnatural sentence structures.
|
||||||
10. Structuring: Include an Introduction, subtopics and use bullet points or
|
10. Blog Structuring: Include an Introduction, subtopics and use bullet points or
|
||||||
numbered lists if appropriate. Important to include FAQs, and Conclusion.
|
numbered lists if appropriate. Important to include FAQs, Conclusion and Referances.
|
||||||
11. Ensure Uniqueness: Guarantee the article is plagiarism-free. Write in unique, informative style.
|
11. Ensure Uniqueness: Guarantee the article is plagiarism-free. Write in unique, informative style.
|
||||||
12. Punctuation: Use appropriate question marks at the end of questions.
|
12. Punctuation: Use appropriate question marks at the end of questions.
|
||||||
13. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
|
13. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
|
||||||
14. REMEMBER to give final response as complete HTML.
|
14. REMEMBER: Use the formatting style of given research report and include citations, referances in combined article.
|
||||||
Follow these guidelines to create a well-optimized, unique, and informative article
|
|
||||||
|
Follow these guidelines to combine and write a new, unique, and informative blog article
|
||||||
that will rank well in search engine results and engage readers effectively.
|
that will rank well in search engine results and engage readers effectively.
|
||||||
|
|
||||||
Create a blog post from the given research report and blog content below.
|
Create a blog post from the given research report and blog content below.
|
||||||
@@ -37,4 +41,4 @@ def blog_with_research(report, blog):
|
|||||||
response = openai_chatgpt(prompt)
|
response = openai_chatgpt(prompt)
|
||||||
return response
|
return response
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
SystemError(f"Error in combining research report and blog content.")
|
SystemError(f"Error in combining blog and research report.")
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||||
|
|
||||||
|
|
||||||
def convert_tomarkdown_format(blog_content):
|
def convert_tomarkdown_format(blog_content):
|
||||||
""" Helper for converting content to markdown format for static sites. """
|
""" Helper for converting content to markdown format for static sites. """
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
@@ -17,6 +20,7 @@ def convert_tomarkdown_format(blog_content):
|
|||||||
Specify a language for syntax highlighting.
|
Specify a language for syntax highlighting.
|
||||||
6). Horizontal Lines: Create a horizontal line using three or more asterisks, dashes, or underscores (---, ***).
|
6). Horizontal Lines: Create a horizontal line using three or more asterisks, dashes, or underscores (---, ***).
|
||||||
7). Table Formatting: Use pipes | and dashes - to create tables. Align text with colons.
|
7). Table Formatting: Use pipes | and dashes - to create tables. Align text with colons.
|
||||||
|
8). Remember to use suitable emojis for the given blog content.
|
||||||
|
|
||||||
Convert the given blog content in well organised markdown content: {blog_content}"""
|
Convert the given blog content in well organised markdown content: {blog_content}"""
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||||
|
|
||||||
def convert_markdown_to_html(md_content):
|
def convert_markdown_to_html(md_content):
|
||||||
""" Helper function to convert given text to HTML
|
""" Helper function to convert given text to HTML
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -1,17 +1,31 @@
|
|||||||
def generate_blog_description(blog_content):
|
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||||
|
import google.generativeai as genai
|
||||||
|
|
||||||
|
|
||||||
|
def generate_blog_description(blog_content, gpt_providers):
|
||||||
"""
|
"""
|
||||||
Prompt designed to give SEO optimized blog descripton
|
Prompt designed to give SEO optimized blog descripton
|
||||||
"""
|
"""
|
||||||
prompt = f"""As an expert SEO and blog writer, Compose a compelling meta description for the given blog content,
|
prompt = f"""As an expert SEO and blog writer, Compose a compelling meta description for the given blog content,
|
||||||
adhering to SEO best practices. Keep it between 150-160 characters, incorporating active verbs,
|
adhering to SEO best practices. Keep it between 150-160 characters.
|
||||||
avoiding all caps and excessive punctuation. Ensure relevance, engage users, and encourage clicks.
|
Provide a glimpse of the content's value to entice readers.
|
||||||
Use keywords naturally and provide a glimpse of the content's value to entice readers.
|
|
||||||
Respond with only one of your best effort and do not include your explanations.
|
Respond with only one of your best effort and do not include your explanations.
|
||||||
Blog Content: {blog_content}"""
|
Blog Content: {blog_content}"""
|
||||||
|
if 'gemini' in gpt_providers:
|
||||||
try:
|
try:
|
||||||
# TBD: Add logic for which_provider and which_model
|
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
|
||||||
response = openai_chatgpt(prompt)
|
except Exception as err:
|
||||||
return response
|
logger.error("Failed in getting GEMINI_API_KEY")
|
||||||
except Exception as err:
|
# Use gemini-pro model for text and image.
|
||||||
SystemError(f"Error in generating blog description: {err}")
|
model = genai.GenerativeModel('gemini-pro')
|
||||||
|
try:
|
||||||
|
response = model.generate_content(prompt)
|
||||||
|
return response.text
|
||||||
|
except Exception as err:
|
||||||
|
logger.error("Failed to get response from gemini.")
|
||||||
|
elif 'openai' in gpt_providers:
|
||||||
|
try:
|
||||||
|
response = openai_chatgpt(prompt)
|
||||||
|
return response
|
||||||
|
except Exception as err:
|
||||||
|
SystemError(f"Error in generating blog summary: {err}")
|
||||||
|
|||||||
@@ -1,16 +0,0 @@
|
|||||||
def get_blog_tags(blog_article):
|
|
||||||
"""
|
|
||||||
Function to suggest tags for the given blog content
|
|
||||||
"""
|
|
||||||
# Suggest at least 5 tags for the following blog post [Enter your blog post text here].
|
|
||||||
prompt = f"""As an expert SEO and blog writer, suggest only 2 relevant and specific blog tags
|
|
||||||
for the given blog content. Only reply with comma separated values.
|
|
||||||
Blog content: {blog_article}."""
|
|
||||||
|
|
||||||
try:
|
|
||||||
# TBD: Add logic for which_provider and which_model
|
|
||||||
response = openai_chatgpt(prompt)
|
|
||||||
except Exception as err:
|
|
||||||
SystemError(f"Error in generating blog tags: {err}")
|
|
||||||
else:
|
|
||||||
return response
|
|
||||||
@@ -1,20 +1,33 @@
|
|||||||
def generate_blog_title(blog_meta_desc):
|
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||||
|
import google.generativeai as genai
|
||||||
|
|
||||||
|
|
||||||
|
def generate_blog_title(blog_meta_desc, gpt_providers):
|
||||||
"""
|
"""
|
||||||
Given a blog title generate an outline for it
|
Given a blog title generate an outline for it
|
||||||
"""
|
"""
|
||||||
# TBD: Remove hardcoding, make dynamic
|
|
||||||
prompt = f"""As a SEO expert and content writer, I will provide you with meta description of blog.
|
prompt = f"""As a SEO expert and content writer, I will provide you with meta description of blog.
|
||||||
Your task is write a SEO optimized, call to action and engaging blog title for it.
|
Your task is write a SEO optimized, call to action and engaging blog title for it.
|
||||||
Follows SEO best practises to suggest the blog title.
|
Follows SEO best practises to suggest the blog title.
|
||||||
Please keep the titles concise, not exceeding 60 words, and ensure to maintain their meaning.
|
Please keep the titles concise, not exceeding 60 words, and ensure to maintain their meaning.
|
||||||
Respond with only one title and no description or keyword like Title:
|
Respond with only one title and no description or keyword like Title:
|
||||||
Generate blog title for this given blog content: {blog_meta_desc}
|
Generate blog title for this given meta description: {blog_meta_desc}
|
||||||
"""
|
"""
|
||||||
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
|
if 'gemini' in gpt_providers:
|
||||||
# TBD: Include --niche
|
try:
|
||||||
logger.info(f"Prompt used for blog title :{prompt}")
|
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
|
||||||
try:
|
except Exception as err:
|
||||||
response = openai_chatgpt(prompt)
|
logger.error("Failed in getting GEMINI_API_KEY")
|
||||||
except Exception as err:
|
# Use gemini-pro model for text and image.
|
||||||
SystemError(f"Error in generating Blog Title: {err}")
|
model = genai.GenerativeModel('gemini-pro')
|
||||||
return response
|
try:
|
||||||
|
response = model.generate_content(prompt)
|
||||||
|
return response.text
|
||||||
|
except Exception as err:
|
||||||
|
logger.error("Failed to get response from gemini.")
|
||||||
|
elif 'openai' in gpt_providers:
|
||||||
|
try:
|
||||||
|
response = openai_chatgpt(prompt)
|
||||||
|
return response
|
||||||
|
except Exception as err:
|
||||||
|
SystemError(f"Error in generating blog summary: {err}")
|
||||||
|
|||||||
47
lib/get_code_examples.py
Normal file
47
lib/get_code_examples.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
"""
|
||||||
|
At the command line, only need to run once to install the package via pip:
|
||||||
|
|
||||||
|
$ pip install google-generativeai
|
||||||
|
"""
|
||||||
|
|
||||||
|
import google.generativeai as genai
|
||||||
|
|
||||||
|
genai.configure(api_key="YOUR_API_KEY")
|
||||||
|
|
||||||
|
# Set up the model
|
||||||
|
generation_config = {
|
||||||
|
"temperature": 1,
|
||||||
|
"top_p": 1,
|
||||||
|
"top_k": 1,
|
||||||
|
"max_output_tokens": 2048,
|
||||||
|
}
|
||||||
|
|
||||||
|
safety_settings = [
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_HARASSMENT",
|
||||||
|
"threshold": "BLOCK_MEDIUM_AND_ABOVE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
||||||
|
"threshold": "BLOCK_MEDIUM_AND_ABOVE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
||||||
|
"threshold": "BLOCK_MEDIUM_AND_ABOVE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
||||||
|
"threshold": "BLOCK_MEDIUM_AND_ABOVE"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
model = genai.GenerativeModel(model_name="gemini-pro",
|
||||||
|
generation_config=generation_config,
|
||||||
|
safety_settings=safety_settings)
|
||||||
|
|
||||||
|
prompt_parts = [
|
||||||
|
"As an expert programmer and web researcher, I will provide you with blog content. Your task is to understand the blog content and do web research around the main keywords. Check if the blog content is about programming then provide me with original code examples, relevant to the blog content. The provided code example should be of high coding standards, include docstring and follow pep8 standards. Do not provide explanations for your response.\nBlog content: \"\"\" {blog_content} \"\"\"\n ",
|
||||||
|
]
|
||||||
|
|
||||||
|
response = model.generate_content(prompt_parts)
|
||||||
|
print(response.text)
|
||||||
31
lib/get_tags.py
Normal file
31
lib/get_tags.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||||
|
import google.generativeai as genai
|
||||||
|
|
||||||
|
|
||||||
|
def get_blog_tags(blog_article, gpt_providers):
|
||||||
|
"""
|
||||||
|
Function to suggest tags for the given blog content
|
||||||
|
"""
|
||||||
|
# Suggest at least 5 tags for the following blog post [Enter your blog post text here].
|
||||||
|
prompt = f"""As an expert SEO and blog writer, suggest only 2 relevant and specific blog tags
|
||||||
|
for the given blog content. Only reply with comma separated values.
|
||||||
|
Blog content: {blog_article}."""
|
||||||
|
|
||||||
|
if 'gemini' in gpt_providers:
|
||||||
|
try:
|
||||||
|
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
|
||||||
|
except Exception as err:
|
||||||
|
logger.error("Failed in getting GEMINI_API_KEY")
|
||||||
|
# Use gemini-pro model for text and image.
|
||||||
|
model = genai.GenerativeModel('gemini-pro')
|
||||||
|
try:
|
||||||
|
response = model.generate_content(prompt)
|
||||||
|
return response.text
|
||||||
|
except Exception as err:
|
||||||
|
logger.error("Failed to get response from gemini.")
|
||||||
|
elif 'openai' in gpt_providers:
|
||||||
|
try:
|
||||||
|
response = openai_chatgpt(prompt)
|
||||||
|
return response
|
||||||
|
except Exception as err:
|
||||||
|
SystemError(f"Error in generating blog summary: {err}")
|
||||||
@@ -17,10 +17,6 @@ from tqdm import tqdm, trange
|
|||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
import nltk
|
|
||||||
nltk.download('punkt', quiet=True)
|
|
||||||
from nltk.corpus import stopwords
|
|
||||||
nltk.download('stopwords', quiet=True)
|
|
||||||
|
|
||||||
from .gpt_providers.openai_gpt_provider import gen_new_from_given_img
|
from .gpt_providers.openai_gpt_provider import gen_new_from_given_img
|
||||||
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||||
@@ -29,6 +25,15 @@ from .generate_image_from_prompt import generate_image
|
|||||||
from .write_blogs_from_youtube_videos import youtube_to_blog
|
from .write_blogs_from_youtube_videos import youtube_to_blog
|
||||||
from .wordpress_blog_uploader import compress_image, upload_blog_post, upload_media
|
from .wordpress_blog_uploader import compress_image, upload_blog_post, upload_media
|
||||||
from .gpt_online_researcher import do_online_research
|
from .gpt_online_researcher import do_online_research
|
||||||
|
from .save_blog_to_file import save_blog_to_file
|
||||||
|
from .optimize_images_for_upload import optimize_image
|
||||||
|
from .combine_research_and_blog import blog_with_research
|
||||||
|
from .get_blog_meta_desc import generate_blog_description
|
||||||
|
from .get_blog_title import generate_blog_title
|
||||||
|
from .get_tags import get_blog_tags
|
||||||
|
from .get_blog_category import get_blog_categories
|
||||||
|
from .convert_content_to_markdown import convert_tomarkdown_format
|
||||||
|
from .convert_markdown_to_html import convert_markdown_to_html
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
logger.remove()
|
logger.remove()
|
||||||
@@ -49,8 +54,8 @@ image_dir = os.path.join(os.getcwd(), image_dir)
|
|||||||
output_path = "blogs"
|
output_path = "blogs"
|
||||||
output_path = os.path.join(os.getcwd(), output_path)
|
output_path = os.path.join(os.getcwd(), output_path)
|
||||||
wordpress_url = ''
|
wordpress_url = ''
|
||||||
wordpress_username = 'upaudel750'
|
wordpress_username = ''
|
||||||
wordpress_password = 'YvCS VbzQ QSp8 4XZe 0DUw Myys'
|
wordpress_password = ''
|
||||||
|
|
||||||
|
|
||||||
def generate_youtube_blog(yt_url_list, output_format="markdown"):
|
def generate_youtube_blog(yt_url_list, output_format="markdown"):
|
||||||
@@ -61,8 +66,11 @@ def generate_youtube_blog(yt_url_list, output_format="markdown"):
|
|||||||
for a_yt_url in yt_url_list:
|
for a_yt_url in yt_url_list:
|
||||||
try:
|
try:
|
||||||
logger.info(f"Starting to write blog on URL: {a_yt_url}")
|
logger.info(f"Starting to write blog on URL: {a_yt_url}")
|
||||||
yt_blog, yt_title = youtube_to_blog(a_yt_url)
|
blog_markdown_str, yt_title = youtube_to_blog(a_yt_url)
|
||||||
if not yt_title or not yt_blog:
|
logger.warning("\n\n--------------- First Draft of the Blog: --------\n\n")
|
||||||
|
logger.info(f"{blog_markdown_str}\n")
|
||||||
|
logger.warning("--------------------END of First draft----------\n\n")
|
||||||
|
if not yt_title or not blog_markdown_str:
|
||||||
logger.error("No content or title for audio to proceed.")
|
logger.error("No content or title for audio to proceed.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -75,24 +83,32 @@ def generate_youtube_blog(yt_url_list, output_format="markdown"):
|
|||||||
if not research_report:
|
if not research_report:
|
||||||
logger.error(f"Error in do_online_research returned no report: {e}")
|
logger.error(f"Error in do_online_research returned no report: {e}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
logger.warning(f"\n\n---------------Online Research Report: {yt_title} --------\n\n")
|
||||||
|
logger.info(f"{research_report}\n")
|
||||||
|
logger.warning("--------------------END of Research Report----------\n\n")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in do_online_research: {e}")
|
logger.error(f"Error in do_online_research: {e}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Note: Check if the order of input matters for your function
|
# Note: Check if the order of input matters for your function
|
||||||
logger.info("Preparing a blog content from audio script and online research content...")
|
logger.info("Preparing a blog content from audio script and online research content...")
|
||||||
blog_with_research(research_report, yt_blog)
|
blog_markdown_str = blog_with_research(research_report, blog_markdown_str)
|
||||||
|
logger.warning("\n\n--------------- Second Blog Draft after online research: --------\n\n")
|
||||||
|
logger.info(f"{blog_markdown_str}\n")
|
||||||
|
logger.warning("--------------------END of Second draft----------\n\n")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in blog_with_research: {e}")
|
logger.error(f"Error in blog_with_research: {e}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Get the title and meta description of the blog.
|
# Get the title and meta description of the blog.
|
||||||
blog_meta_desc = generate_blog_description(yt_blog)
|
blog_meta_desc = generate_blog_description(blog_markdown_str, "gemini")
|
||||||
title = generate_blog_title(blog_meta_desc)
|
title = generate_blog_title(blog_meta_desc, "gemini")
|
||||||
logger.info(f"Title is {title} and description is {blog_meta_desc}")
|
logger.info(f"Title is {title} and description is {blog_meta_desc}")
|
||||||
blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
|
# Regex pattern to match 'Title:', 'title:', 'TITLE:', etc., followed by optional whitespace
|
||||||
|
title = re.sub(re.compile(r'(?i)title:\s*'), '', title)
|
||||||
|
#blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
|
||||||
|
|
||||||
# Get blog tags and categories.
|
# Get blog tags and categories.
|
||||||
blog_tags = get_blog_tags(blog_meta_desc)
|
blog_tags = get_blog_tags(blog_meta_desc)
|
||||||
logger.info(f"Blog tags are: {blog_tags}")
|
logger.info(f"Blog tags are: {blog_tags}")
|
||||||
@@ -100,47 +116,58 @@ def generate_youtube_blog(yt_url_list, output_format="markdown"):
|
|||||||
logger.info(f"Blog categories are: {blog_categories}")
|
logger.info(f"Blog categories are: {blog_categories}")
|
||||||
|
|
||||||
# Generate an introduction for the blog
|
# Generate an introduction for the blog
|
||||||
blog_intro = get_blog_intro(title, yt_blog)
|
#blog_intro = get_blog_intro(title, blog_markdown_str)
|
||||||
logger.info(f"The Blog intro is:\n {blog_intro}")
|
#logger.info(f"The Blog intro is:\n {blog_intro}")
|
||||||
blog_markdown_str = blog_markdown_str + "\n\n" + f"{blog_intro}" + "\n\n"
|
#blog_markdown_str = blog_markdown_str + "\n\n" + f"{blog_intro}" + "\n\n"
|
||||||
|
|
||||||
# Generate an image based on meta description
|
# Generate an image based on meta description
|
||||||
logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
|
logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
|
||||||
main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
|
main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
|
||||||
|
main_img_path = optimize_image(main_img_path)
|
||||||
|
|
||||||
# Get a variation of the yt url screenshot to use in the blog.
|
# Get a variation of the yt url screenshot to use in the blog.
|
||||||
#varied_img_path = gen_new_from_given_img(yt_img_path, image_dir)
|
#varied_img_path = gen_new_from_given_img(yt_img_path, image_dir)
|
||||||
#logger.info(f"Image path: {main_img_path} and varied path: {varied_img_path}")
|
#logger.info(f"Image path: {main_img_path} and varied path: {varied_img_path}")
|
||||||
#blog_markdown_str = blog_markdown_str + f'})' + '_Image Caption_'
|
#blog_markdown_str = blog_markdown_str + f'})' + f'_{yt_title}_'
|
||||||
|
|
||||||
#stbdiff_img_path = generate_image(yt_img_path, image_dir, "stable_diffusion")
|
#stbdiff_img_path = generate_image(yt_img_path, image_dir, "stable_diffusion")
|
||||||
#logger.info(f"Image path: {main_img_path} from stable diffusion: {stbdiff_img_path}")
|
#logger.info(f"Image path: {main_img_path} from stable diffusion: {stbdiff_img_path}")
|
||||||
#blog_markdown_str = blog_markdown_str + f'})' + f'_{title}_'
|
#blog_markdown_str = blog_markdown_str + f'})' + f'_{yt_title}_'
|
||||||
|
|
||||||
# Add the body of the blog content.
|
# Add the body of the blog content.
|
||||||
blog_markdown_str = blog_markdown_str + "\n\n" + f'{yt_blog}' + "\n\n"
|
#blog_markdown_str = blog_markdown_str + "\n\n" + f'{yt_blog}' + "\n\n"
|
||||||
|
|
||||||
# Get the Conclusion of the blog, by passing the generated blog.
|
# Get the Conclusion of the blog, by passing the generated blog.
|
||||||
blog_conclusion = get_blog_conclusion(blog_markdown_str)
|
#blog_conclusion = get_blog_conclusion(blog_markdown_str)
|
||||||
# TBD: Add another image.
|
# TBD: Add another image.
|
||||||
blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n\n" + f"{blog_conclusion}" + "\n"
|
#blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n\n" + f"{blog_conclusion}" + "\n"
|
||||||
|
|
||||||
# Proofread the blog, edit and remove dubplicates and refine it further.
|
# Proofread the blog, edit and remove dubplicates and refine it further.
|
||||||
# Presently, fixing the blog keywords to be tags and categories.
|
# Presently, fixing the blog keywords to be tags and categories.
|
||||||
blog_keywords = f"{blog_tags} + {blog_categories}"
|
#blog_keywords = f"{blog_tags} + {blog_categories}"
|
||||||
blog_markdown_str = blog_proof_editor(blog_markdown_str, blog_keywords)
|
#blog_markdown_str = blog_proof_editor(blog_markdown_str, blog_keywords)
|
||||||
|
#logger.warning("\n\n--------------- 3rd draft after proofreading: --------\n\n")
|
||||||
|
#logger.info(f"{blog_markdown_str}\n")
|
||||||
|
#logger.warning("--------------------END of 3rd draft----------\n\n")
|
||||||
|
|
||||||
# Check the type of blog format needed by the user.
|
# Check the type of blog format needed by the user.
|
||||||
if 'html' in output_format:
|
if 'html' in output_format:
|
||||||
blog_markdown_str = convert_tomarkdown_format(blog_markdown_str)
|
logger.info("Converting final blog to HTML format.")
|
||||||
elif 'markdown' in output_path:
|
|
||||||
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
|
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
|
||||||
|
elif 'markdown' in output_format:
|
||||||
|
logger.info("Converting final blog to Markdown style.")
|
||||||
|
blog_markdown_str = convert_tomarkdown_format(blog_markdown_str)
|
||||||
|
|
||||||
|
logger.warning("\n\n--------------- Final Blog Content: --------\n\n")
|
||||||
|
logger.info(f"{blog_markdown_str}\n")
|
||||||
|
logger.warning("--------------------END of Blog Content----------\n\n")
|
||||||
|
|
||||||
|
|
||||||
# Try to save the blog content in a file, in whichever format. Just dump it.
|
# Try to save the blog content in a file, in whichever format. Just dump it.
|
||||||
try:
|
try:
|
||||||
save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
|
save_blog_to_file(blog_markdown_str, title, blog_meta_desc,
|
||||||
|
blog_tags, blog_categories, main_img_path, output_path)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.error("Failed to Save blog content: {blog_markdown_str}")
|
logger.error(f"Failed to Save blog content: {err}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# raise assertionerror
|
# raise assertionerror
|
||||||
@@ -288,29 +315,6 @@ def generate_blog_topics(blog_keywords, num_blogs, niche):
|
|||||||
SystemError(f"Error in generating blog topics: {err}")
|
SystemError(f"Error in generating blog topics: {err}")
|
||||||
|
|
||||||
|
|
||||||
def generate_blog_title(blog_meta_desc):
|
|
||||||
"""
|
|
||||||
Given a blog title generate an outline for it
|
|
||||||
"""
|
|
||||||
# TBD: Remove hardcoding, make dynamic
|
|
||||||
prompt = f"""As a SEO expert and content writer, I will provide you with meta description of blog.
|
|
||||||
Your task is write a SEO optimized, call to action and engaging blog title for it.
|
|
||||||
Follows SEO best practises to suggest the blog title.
|
|
||||||
Please keep the titles concise, not exceeding 60 words, and ensure to maintain their meaning.
|
|
||||||
Respond with only one title and no description or keyword like Title:
|
|
||||||
Generate blog title for this given meta description: {blog_meta_desc}
|
|
||||||
"""
|
|
||||||
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
|
|
||||||
# TBD: Include --niche
|
|
||||||
logger.info(f"Prompt used for blog title :{prompt}")
|
|
||||||
try:
|
|
||||||
response = openai_chatgpt(prompt)
|
|
||||||
except Exception as err:
|
|
||||||
SystemError(f"Error in generating Blog Title: {err}")
|
|
||||||
return response
|
|
||||||
|
|
||||||
|
|
||||||
def generate_topic_outline(blog_title, num_subtopics):
|
|
||||||
"""
|
"""
|
||||||
Given a blog title generate an outline for it
|
Given a blog title generate an outline for it
|
||||||
"""
|
"""
|
||||||
@@ -386,122 +390,6 @@ def get_blog_conclusion(blog_content):
|
|||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
def generate_blog_description(blog_content):
|
|
||||||
"""
|
|
||||||
Prompt designed to give SEO optimized blog descripton
|
|
||||||
"""
|
|
||||||
prompt = f"""As an expert SEO and blog writer, Compose a compelling meta description for the given blog content,
|
|
||||||
adhering to SEO best practices. Keep it between 150-160 characters, incorporating active verbs,
|
|
||||||
avoiding all caps and excessive punctuation. Ensure relevance, engage users, and encourage clicks.
|
|
||||||
Use keywords naturally and provide a glimpse of the content's value to entice readers.
|
|
||||||
Respond with only one of your best effort and do not include your explanations.
|
|
||||||
Blog Content: {blog_content}"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
# TBD: Add logic for which_provider and which_model
|
|
||||||
response = openai_chatgpt(prompt)
|
|
||||||
return response
|
|
||||||
except Exception as err:
|
|
||||||
SystemError(f"Error in generating blog description: {err}")
|
|
||||||
|
|
||||||
|
|
||||||
def get_blog_tags(blog_article):
|
|
||||||
"""
|
|
||||||
Function to suggest tags for the given blog content
|
|
||||||
"""
|
|
||||||
# Suggest at least 5 tags for the following blog post [Enter your blog post text here].
|
|
||||||
prompt = f"""As an expert SEO and blog writer, suggest only 2 relevant and specific blog tags
|
|
||||||
for the given blog content. Only reply with comma separated values.
|
|
||||||
Blog content: {blog_article}."""
|
|
||||||
|
|
||||||
try:
|
|
||||||
# TBD: Add logic for which_provider and which_model
|
|
||||||
response = openai_chatgpt(prompt)
|
|
||||||
except Exception as err:
|
|
||||||
SystemError(f"Error in generating blog tags: {err}")
|
|
||||||
else:
|
|
||||||
return response
|
|
||||||
|
|
||||||
|
|
||||||
def get_blog_categories(blog_article):
|
|
||||||
"""
|
|
||||||
Function to generate blog categories for given blog content.
|
|
||||||
"""
|
|
||||||
prompt = f"""As an expert SEO and content writer, I will provide you with blog content.
|
|
||||||
Suggest only 2 blog categories which are most relevant to provided blog content,
|
|
||||||
by identifying the main topic. Also consider the target audience and the
|
|
||||||
blog's category taxonomy. Only reply with comma separated values. The blog content is: {blog_article}"
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# TBD: Add logic for which_provider and which_model
|
|
||||||
response = openai_chatgpt(prompt)
|
|
||||||
except Exception as err:
|
|
||||||
SystemError(f"Error in generating blog categories: {err}")
|
|
||||||
else:
|
|
||||||
return response
|
|
||||||
|
|
||||||
|
|
||||||
def save_blog_to_file(blog_content, blog_title,
|
|
||||||
blog_meta_desc, blog_tags, blog_categories, main_img_path, file_type="md"):
|
|
||||||
""" Common function to save the generated blog to a file.
|
|
||||||
arg: file_type can be md or html
|
|
||||||
"""
|
|
||||||
# Convert the spaces in blog_title with dash
|
|
||||||
logger.info(f"The blog will be saved at: {output_path}")
|
|
||||||
logger.debug(f"Blog Title is: {blog_title}")
|
|
||||||
blog_title_md = blog_title
|
|
||||||
regex = re.compile('[^a-zA-Z0-9- ]')
|
|
||||||
blog_title_md = regex.sub('', blog_title_md)
|
|
||||||
blog_title= blog_title.replace(":", "")
|
|
||||||
blog_title_md = re.sub('--+', '-', blog_title_md)
|
|
||||||
blog_title_md = blog_title_md.replace(' ', '-')
|
|
||||||
blog_title_md = remove_stop_words(blog_title_md)
|
|
||||||
|
|
||||||
if ':' in blog_meta_desc:
|
|
||||||
blog_meta_desc = blog_meta_desc.split(':')[1].strip()
|
|
||||||
|
|
||||||
if not os.path.exists(output_path):
|
|
||||||
logger.error("Error: Blog output directory is set to {output_path}, which Does Not Exist.")
|
|
||||||
|
|
||||||
# Different output formats are plaintext, html and markdown.
|
|
||||||
if file_type in "md":
|
|
||||||
logger.info(f"Writing/Saving the resultant blog content in Markdown format.")
|
|
||||||
# fill the Front Matter as below at the top of the post: https://jekyllrb.com/docs/front-matter/
|
|
||||||
# date: YYYY-MM-DD HH:MM:SS +/-TTTT
|
|
||||||
from zoneinfo import ZoneInfo
|
|
||||||
tz=ZoneInfo('Asia/Kolkata')
|
|
||||||
dtobj = datetime.datetime.now(tz=ZoneInfo('Asia/Kolkata'))
|
|
||||||
formatted_date = f"{dtobj.strftime('%Y-%m-%d %H:%M:%S %z')}"
|
|
||||||
|
|
||||||
blog_frontmatter = f"""\
|
|
||||||
---
|
|
||||||
title: {blog_title}
|
|
||||||
date: {formatted_date}
|
|
||||||
categories: [{blog_categories}]
|
|
||||||
tags: [{blog_tags}]
|
|
||||||
description: {blog_meta_desc}
|
|
||||||
img_path: '/assets/'
|
|
||||||
image:
|
|
||||||
path: {os.path.basename(main_img_path)}
|
|
||||||
alt: {blog_title}
|
|
||||||
---\n\n"""
|
|
||||||
|
|
||||||
# Create a new file named YYYY-MM-DD-TITLE.EXTENSION and put it in the _posts of the root directory.
|
|
||||||
# Please note that the EXTENSION must be one of md or markdown
|
|
||||||
blog_output_path = os.path.join(
|
|
||||||
output_path,
|
|
||||||
f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
|
|
||||||
)
|
|
||||||
# Save the generated blog content to a file.
|
|
||||||
try:
|
|
||||||
with open(blog_output_path, "w") as f:
|
|
||||||
f.write(dedent(blog_frontmatter))
|
|
||||||
f.write(blog_content)
|
|
||||||
except Exception as e:
|
|
||||||
raise Exception(f"Failed to write blog content: {e}")
|
|
||||||
logger.info(f"\nSuccessfully saved and Posted blog at: {blog_output_path,}\n")
|
|
||||||
|
|
||||||
|
|
||||||
def get_related_keywords(num_blogs, keywords, niche):
|
def get_related_keywords(num_blogs, keywords, niche):
|
||||||
"""
|
"""
|
||||||
Helper function to get more keywords from GPTs.
|
Helper function to get more keywords from GPTs.
|
||||||
@@ -525,131 +413,6 @@ def get_related_keywords(num_blogs, keywords, niche):
|
|||||||
SystemError(f"Error in getting related keywords.")
|
SystemError(f"Error in getting related keywords.")
|
||||||
|
|
||||||
|
|
||||||
# Helper function
|
|
||||||
def remove_stop_words(sentence):
|
|
||||||
# Tokenize the sentence into words
|
|
||||||
words = nltk.word_tokenize(sentence)
|
|
||||||
|
|
||||||
# Get the list of English stop words
|
|
||||||
stop_words = set(stopwords.words('english'))
|
|
||||||
|
|
||||||
# Remove stop words from the sentence
|
|
||||||
filtered_words = [word for word in words if word.lower() not in stop_words]
|
|
||||||
|
|
||||||
# Join the filtered words back into a sentence
|
|
||||||
filtered_sentence = ' '.join(filtered_words)
|
|
||||||
|
|
||||||
return filtered_sentence
|
|
||||||
|
|
||||||
|
|
||||||
def convert_tomarkdown_format(blog_content):
|
|
||||||
""" Helper for converting content to markdown format for static sites. """
|
|
||||||
prompt = f"""
|
|
||||||
As an expert in markdown language format and font matter, used for static webpages.
|
|
||||||
Your task is to convert and improve formatting of given blog content.
|
|
||||||
Do Not modify the content, only modify to convert it into highly readable blog content.
|
|
||||||
|
|
||||||
Use below guidelines and include other best practises:
|
|
||||||
1). Headers for Structure: Use # for main headings and increase the number of # for
|
|
||||||
subheadings (##, ###, etc.). Organize given content into clear, hierarchical sections.
|
|
||||||
2). Emphasizing Text: Use single asterisks or underscores for italic (*italic* or _italic_),
|
|
||||||
double for bold (**bold** or __bold__), and triple for bold italic (***bold italic***).
|
|
||||||
3). Lists: For unordered lists, use dashes, asterisks, or plus signs (-, *, +).
|
|
||||||
For ordered lists, use numbers followed by periods (1., 2., etc.).
|
|
||||||
4). Blockquotes: Use > for blockquotes, and add additional > for nested blockquotes.
|
|
||||||
5). Code Blocks: Use backticks for inline code (code) and triple backticks for code blocks.
|
|
||||||
Specify a language for syntax highlighting.
|
|
||||||
6). Horizontal Lines: Create a horizontal line using three or more asterisks, dashes, or underscores (---, ***).
|
|
||||||
7). Table Formatting: Use pipes | and dashes - to create tables. Align text with colons.
|
|
||||||
|
|
||||||
Convert the given blog content in well organised markdown content: {blog_content}"""
|
|
||||||
try:
|
|
||||||
# TBD: Add logic for which_provider and which_model
|
|
||||||
response = openai_chatgpt(prompt)
|
|
||||||
return response
|
|
||||||
except Exception as err:
|
|
||||||
SystemError(f"Error in converting to Markdown format.")
|
|
||||||
|
|
||||||
|
|
||||||
def convert_markdown_to_html(md_content):
|
|
||||||
""" Helper function to convert given text to HTML
|
|
||||||
"""
|
|
||||||
prompt =f"""
|
|
||||||
You are a skilled web developer tasked with converting a Markdown-formatted text to HTML.
|
|
||||||
You will be given text in markdown format. Follow these steps to perform the conversion:
|
|
||||||
|
|
||||||
1. Parse User's Markdown Input: You will receive a Markdown-formatted text as input from the user.
|
|
||||||
Carefully analyze the provided Markdown text, paying attention to different elements such as headings (#),
|
|
||||||
lists (unordered and ordered), bold and italic text, links, images, and code blocks.
|
|
||||||
2. Generate and Validate HTML: Generate corresponding HTML code for each Markdown element following
|
|
||||||
the conversion guidelines below. Ensure the generated HTML is well-structured and syntactically correct.
|
|
||||||
3. Preserve Line Breaks: Markdown line breaks (soft breaks) represented by two spaces at the end of a
|
|
||||||
line should be converted to <br> tags in HTML to preserve the line breaks.
|
|
||||||
4. REMEMBER to generate complete, valid HTML response only.
|
|
||||||
|
|
||||||
Follow below Conversion Guidelines:
|
|
||||||
- Headers: Convert Markdown headers (#, ##, ###, etc.) to corresponding HTML header tags (<h1>, <h2>, <h3>, etc.).
|
|
||||||
- Lists: Convert unordered lists (*) and ordered lists (1., 2., 3., etc.) to <ul> and <ol> HTML tags, respectively.
|
|
||||||
List items should be enclosed in <li> tags.
|
|
||||||
- Emphasis: Convert bold (**) and italic (*) text to <strong> and <em> HTML tags, respectively.
|
|
||||||
- Links: Convert Markdown links ([text](url)) to HTML anchor (<a>) tags. Ensure the href attribute contains the correct URL.
|
|
||||||
- Images: Convert Markdown image tags () to HTML image (<img>) tags.
|
|
||||||
Include the alt attribute for accessibility.
|
|
||||||
- Code: Convert inline code (`code`) to <code> HTML tags. Convert code blocks (```) to <pre> HTML tags
|
|
||||||
for preserving formatting.
|
|
||||||
- Blockquotes: Convert blockquotes (>) to <blockquote> HTML tags.
|
|
||||||
Convert the following Markdown text to HTML: {md_content}
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# TBD: Add logic for which_provider and which_model
|
|
||||||
response = openai_chatgpt(prompt)
|
|
||||||
return response
|
|
||||||
except Exception as err:
|
|
||||||
SystemError(f"Error in convert to HTML")
|
|
||||||
|
|
||||||
|
|
||||||
def blog_with_research(report, blog):
|
|
||||||
"""Combine the given online research and gpt blog content"""
|
|
||||||
|
|
||||||
prompt = f"""
|
|
||||||
You are an expert copywriter specializing in content optimization for SEO.
|
|
||||||
I will provide you with a research report and a blog content on the same topic.
|
|
||||||
Treat the research report as the context for the blog and better it accordingly.
|
|
||||||
Your task is to transform and combine the given research and blog content into a well-structured, unique
|
|
||||||
and engaging blog article.
|
|
||||||
Your objectives include:
|
|
||||||
1. Master the report and blog content: Understand main ideas, key points, and the core message.
|
|
||||||
2. Sentence Structure: Rephrase while preserving logical flow and coherence.
|
|
||||||
3. Identify Main Keyword: Determine the primary topic and combine the articles on the main topic.
|
|
||||||
4. Keyword Integration: Naturally integrate keywords in headings, subheadings, and body text, avoiding overuse.
|
|
||||||
5. Write Unique Content: Avoid direct copying from given report and blog; rewrite in your own words and style.
|
|
||||||
6. Optimize for SEO: Generate high quality informative content.
|
|
||||||
Implement SEO best practises with appropriate keyword density.
|
|
||||||
7. Craft Engaging and Informative Article: Provide value and insight to readers.
|
|
||||||
8. Proofread: Important to Check for grammar, spelling, and punctuation errors.
|
|
||||||
9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases,
|
|
||||||
interjections, and colloquialisms. Avoid repetitive phrases and unnatural sentence structures.
|
|
||||||
10. Structuring: Include an Introduction, subtopics and use bullet points or
|
|
||||||
numbered lists if appropriate. Important to include FAQs, and Conclusion.
|
|
||||||
11. Ensure Uniqueness: Guarantee the article is plagiarism-free. Write in unique, informative style.
|
|
||||||
12. Punctuation: Use appropriate question marks at the end of questions.
|
|
||||||
13. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
|
|
||||||
14. REMEMBER to give final response as complete HTML.
|
|
||||||
Follow these guidelines to create a well-optimized, unique, and informative article
|
|
||||||
that will rank well in search engine results and engage readers effectively.
|
|
||||||
|
|
||||||
Create a blog post from the given research report and blog content below.
|
|
||||||
Research report: {report}
|
|
||||||
Blog content: {blog}
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# TBD: Add logic for which_provider and which_model
|
|
||||||
response = openai_chatgpt(prompt)
|
|
||||||
return response
|
|
||||||
except Exception as err:
|
|
||||||
SystemError(f"Error in getting related keywords.")
|
|
||||||
|
|
||||||
|
|
||||||
def blog_proof_editor(blog_content, blog_keywords):
|
def blog_proof_editor(blog_content, blog_keywords):
|
||||||
"""
|
"""
|
||||||
Helper for blog proof reading.
|
Helper for blog proof reading.
|
||||||
@@ -659,12 +422,12 @@ def blog_proof_editor(blog_content, blog_keywords):
|
|||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
prompt = f"""I am looking for detailed editing and enhancement of the given blog post,
|
prompt = f"""I am looking for detailed editing and enhancement of the given blog post,
|
||||||
with a particular focus on maintaining originality.
|
with a particular focus on originality. I will provide you with a blog content and its keywords.
|
||||||
The topic of the content is [{blog_keywords}]. Please go through the blog and make direct edits to improve it,
|
The keywords for the blog are [{blog_keywords}]. Please go through the blog and make direct edits to improve it,
|
||||||
ensuring the final output is both high-quality and original.
|
ensuring the final output is both high-quality and original.
|
||||||
Note: There are duplicates headings and corresponding paragraphs, rewrite into one subheading.
|
Note: There are duplicates headings and corresponding paragraphs, rewrite into one subheading.
|
||||||
|
|
||||||
Here are the specific areas to focus on:
|
Here are the specific guidelines to focus on:
|
||||||
|
|
||||||
1). Ensure Originality: Edit any sections that lack originality, replacing them with unique and creative content.
|
1). Ensure Originality: Edit any sections that lack originality, replacing them with unique and creative content.
|
||||||
2). Eliminate Repetitive Language: Rewrite repetitive phrases with varied and engaging language.
|
2). Eliminate Repetitive Language: Rewrite repetitive phrases with varied and engaging language.
|
||||||
@@ -677,8 +440,11 @@ def blog_proof_editor(blog_content, blog_keywords):
|
|||||||
7). Remove Redundancies: Important, Cut out any redundant information or overly complex jargon.
|
7). Remove Redundancies: Important, Cut out any redundant information or overly complex jargon.
|
||||||
8). Refine Overall Structure: Make structural changes to improve the overall impact of the content.
|
8). Refine Overall Structure: Make structural changes to improve the overall impact of the content.
|
||||||
9). Remember, rewrite all content that repeated, while maintaining the formatting of the given blog text.
|
9). Remember, rewrite all content that repeated, while maintaining the formatting of the given blog text.
|
||||||
|
10). Remember Not to include SEO meta description and Title in your final response.
|
||||||
|
11). REMEMBER to maintain the formatting style of the provided blog.
|
||||||
|
12). Judge if the given blog is about technology then provide code snippets and examples for it.
|
||||||
|
|
||||||
Please apply these changes directly to the following blog text and provide the edited version:
|
Please make direct changes as per above guideline to the provided blog text below:
|
||||||
[{blog_content}]. """
|
[{blog_content}]. """
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -68,11 +68,11 @@ def do_online_research(query):
|
|||||||
f'Please use MLA format and markdown syntax.')
|
f'Please use MLA format and markdown syntax.')
|
||||||
}]
|
}]
|
||||||
# Run GPT-4
|
# Run GPT-4
|
||||||
logging.info("Generating report with GPT-4...")
|
logging.info("Generating Research report with GPT-4...")
|
||||||
lc_messages = convert_openai_messages(prompt)
|
lc_messages = convert_openai_messages(prompt)
|
||||||
try:
|
try:
|
||||||
report = ChatOpenAI(model='gpt-4', openai_api_key=openai_api_key).invoke(lc_messages).content
|
report = ChatOpenAI(model='gpt-4', openai_api_key=openai_api_key).invoke(lc_messages).content
|
||||||
logging.info(f"\n Below is the online research report for given keywords/title: \n\n{report}")
|
#logging.info(f"\n Below is the online research report for given keywords/title: \n\n{report}")
|
||||||
return report
|
return report
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logging.error("Failed to generate do_online_research with ChatOpenAI")
|
logging.error("Failed to generate do_online_research with ChatOpenAI")
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ def speech_to_text(video_url, output_path='.'):
|
|||||||
file=open(audio_file, "rb"),
|
file=open(audio_file, "rb"),
|
||||||
response_format="text"
|
response_format="text"
|
||||||
)
|
)
|
||||||
logger.info("\nYouTube video transcription:\n\n{transcript}\n")
|
logger.info(f"\nYouTube video transcription:\n\n{transcript}\n")
|
||||||
return transcript, yt.title
|
return transcript, yt.title
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
201
lib/online_research_agent.py
Normal file
201
lib/online_research_agent.py
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
import os
|
||||||
|
import requests
|
||||||
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
|
from langchain import PromptTemplate
|
||||||
|
from langchain.chains.summarize import load_summarize_chain
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from langchain.chat_models import ChatOpenAI
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import json
|
||||||
|
from autogen import config_list_from_json
|
||||||
|
from autogen.agentchat.contrib.gpt_assistant_agent import GPTAssistantAgent
|
||||||
|
from autogen import UserProxyAgent
|
||||||
|
import autogen
|
||||||
|
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
brwoserless_api_key = os.getenv("BROWSERLESS_API_KEY")
|
||||||
|
serper_api_key = os.getenv("SERP_API_KEY")
|
||||||
|
airtable_api_key = os.getenv("AIRTABLE_API_KEY")
|
||||||
|
config_list = config_list_from_json("OAI_CONFIG_LIST")
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------ Create functions ------------------ #
|
||||||
|
|
||||||
|
# Function for google search
|
||||||
|
def google_search(search_keyword):
|
||||||
|
url = "https://google.serper.dev/search"
|
||||||
|
|
||||||
|
payload = json.dumps({
|
||||||
|
"q": search_keyword
|
||||||
|
})
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'X-API-KEY': serper_api_key,
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.request("POST", url, headers=headers, data=payload)
|
||||||
|
print("RESPONSE:", response.text)
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
# Function for scraping
|
||||||
|
def summary(objective, content):
|
||||||
|
llm = ChatOpenAI(temperature = 0, model = "gpt-3.5-turbo-16k-0613")
|
||||||
|
|
||||||
|
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size = 10000, chunk_overlap=500)
|
||||||
|
docs = text_splitter.create_documents([content])
|
||||||
|
|
||||||
|
map_prompt = """
|
||||||
|
Write a summary of the following text for {objective}:
|
||||||
|
"{text}"
|
||||||
|
SUMMARY:
|
||||||
|
"""
|
||||||
|
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text", "objective"])
|
||||||
|
|
||||||
|
summary_chain = load_summarize_chain(
|
||||||
|
llm=llm,
|
||||||
|
chain_type='map_reduce',
|
||||||
|
map_prompt = map_prompt_template,
|
||||||
|
combine_prompt = map_prompt_template,
|
||||||
|
verbose = False
|
||||||
|
)
|
||||||
|
|
||||||
|
output = summary_chain.run(input_documents=docs, objective=objective)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
def web_scraping(objective: str, url: str):
|
||||||
|
#scrape website, and also will summarize the content based on objective if the content is too large
|
||||||
|
#objective is the original objective & task that user give to the agent, url is the url of the website to be scraped
|
||||||
|
|
||||||
|
print("Scraping website...")
|
||||||
|
# Define the headers for the request
|
||||||
|
headers = {
|
||||||
|
'Cache-Control': 'no-cache',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
}
|
||||||
|
|
||||||
|
# Define the data to be sent in the request
|
||||||
|
data = {
|
||||||
|
"url": url
|
||||||
|
}
|
||||||
|
|
||||||
|
# Convert Python object to JSON string
|
||||||
|
data_json = json.dumps(data)
|
||||||
|
|
||||||
|
# Send the POST request
|
||||||
|
response = requests.post(f"https://chrome.browserless.io/content?token={brwoserless_api_key}", headers=headers, data=data_json)
|
||||||
|
|
||||||
|
# Check the response status code
|
||||||
|
if response.status_code == 200:
|
||||||
|
soup = BeautifulSoup(response.content, "html.parser")
|
||||||
|
text = soup.get_text()
|
||||||
|
print("CONTENTTTTTT:", text)
|
||||||
|
if len(text) > 10000:
|
||||||
|
output = summary(objective,text)
|
||||||
|
return output
|
||||||
|
else:
|
||||||
|
return text
|
||||||
|
else:
|
||||||
|
print(f"HTTP request failed with status code {response.status_code}")
|
||||||
|
|
||||||
|
|
||||||
|
# Function for get airtable records
|
||||||
|
def get_airtable_records(base_id, table_id):
|
||||||
|
url = f"https://api.airtable.com/v0/{base_id}/{table_id}"
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Authorization': f'Bearer {airtable_api_key}',
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.request("GET", url, headers=headers)
|
||||||
|
data = response.json()
|
||||||
|
print(data)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
# Function for update airtable records
|
||||||
|
|
||||||
|
def update_single_airtable_record(base_id, table_id, id, fields):
|
||||||
|
url = f"https://api.airtable.com/v0/{base_id}/{table_id}"
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Authorization': f'Bearer {airtable_api_key}',
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"records": [{
|
||||||
|
"id": id,
|
||||||
|
"fields": fields
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.patch(url, headers=headers, data=json.dumps(data))
|
||||||
|
data = response.json()
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------ Create agent ------------------ #
|
||||||
|
|
||||||
|
# Create user proxy agent
|
||||||
|
user_proxy = UserProxyAgent(name="user_proxy",
|
||||||
|
is_termination_msg=lambda msg: "TERMINATE" in msg["content"],
|
||||||
|
human_input_mode="ALWAYS",
|
||||||
|
max_consecutive_auto_reply=1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create researcher agent
|
||||||
|
researcher = GPTAssistantAgent(
|
||||||
|
name = "researcher",
|
||||||
|
llm_config = {
|
||||||
|
"config_list": config_list,
|
||||||
|
"assistant_id": "asst_qyvioid5My8K3SdFClaEnwmB"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
researcher.register_function(
|
||||||
|
function_map={
|
||||||
|
"web_scraping": web_scraping,
|
||||||
|
"google_search": google_search
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create research manager agent
|
||||||
|
research_manager = GPTAssistantAgent(
|
||||||
|
name="research_manager",
|
||||||
|
llm_config = {
|
||||||
|
"config_list": config_list,
|
||||||
|
"assistant_id": "asst_C1Ta5XmmEcYD6vnOSVflnwG9"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Create director agent
|
||||||
|
director = GPTAssistantAgent(
|
||||||
|
name = "director",
|
||||||
|
llm_config = {
|
||||||
|
"config_list": config_list,
|
||||||
|
"assistant_id": "asst_zVBJGch5mOyCYl9H1J3L9Ime",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
director.register_function(
|
||||||
|
function_map={
|
||||||
|
"get_airtable_records": get_airtable_records,
|
||||||
|
"update_single_airtable_record": update_single_airtable_record
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Create group chat
|
||||||
|
groupchat = autogen.GroupChat(agents=[user_proxy, researcher, research_manager, director], messages=[], max_round=15)
|
||||||
|
group_chat_manager = autogen.GroupChatManager(groupchat=groupchat, llm_config={"config_list": config_list})
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------ start conversation ------------------ #
|
||||||
|
message = """
|
||||||
|
Research the funding stage/amount & pricing for each company in the list: https://airtable.com/appj0J4gFpvLrQWjI/tblF4OmG6oLjYtgZl/viwmFx2ttAVrJm0E3?blocks=hide
|
||||||
|
"""
|
||||||
|
user_proxy.initiate_chat(group_chat_manager, message=message)
|
||||||
112
lib/optimize_images_for_upload.py
Normal file
112
lib/optimize_images_for_upload.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import tinify
|
||||||
|
from PIL import Image
|
||||||
|
from loguru import logger
|
||||||
|
from tqdm import tqdm
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
#default directory for .env file is the current directory
|
||||||
|
#if you set .env in different directory, put the directory address load_dotenv("directory_of_.env)
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Retrieve Tinyfy API key from environment variable
|
||||||
|
tinify.key = os.getenv('TINIFY_API_KEY')
|
||||||
|
|
||||||
|
# Configure logger
|
||||||
|
logger.remove()
|
||||||
|
logger.add(sys.stdout, colorize=True, format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}")
|
||||||
|
|
||||||
|
def compress_image(image_path, quality=45, resize=None, preserve_exif=False):
|
||||||
|
"""
|
||||||
|
Compress and optionally resize an image, and overwrite the original image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path (str): Path to the original image.
|
||||||
|
quality (int): Quality of the output image (1-100).
|
||||||
|
resize (tuple): Tuple (width, height) to resize image.
|
||||||
|
preserve_exif (bool): Preserve EXIF data if True.
|
||||||
|
"""
|
||||||
|
if not os.path.exists(image_path):
|
||||||
|
logger.error(f"Image path does not exist: {image_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
original_size = os.path.getsize(image_path)
|
||||||
|
try:
|
||||||
|
with Image.open(image_path) as img:
|
||||||
|
img_format = img.format
|
||||||
|
exif = img.info['exif'] if preserve_exif and 'exif' in img.info else None
|
||||||
|
|
||||||
|
if resize:
|
||||||
|
img = img.resize(resize, Image.ANTIALIAS)
|
||||||
|
|
||||||
|
img.save(image_path, format=img_format, quality=quality, optimize=True, exif=exif)
|
||||||
|
|
||||||
|
compressed_size = os.path.getsize(image_path)
|
||||||
|
reduction = (1 - (compressed_size / original_size)) * 100
|
||||||
|
logger.info(f"Compressed {image_path}, Reduction: {reduction:.2f}%")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error compressing image {image_path}: {e}")
|
||||||
|
|
||||||
|
def is_image_file(filename):
|
||||||
|
"""
|
||||||
|
Check if a file is an image based on its extension.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename (str): Name of the file to check.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the file is an image, False otherwise.
|
||||||
|
"""
|
||||||
|
valid_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
|
||||||
|
return any(filename.lower().endswith(ext) for ext in valid_extensions)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_webp(image_path):
|
||||||
|
"""
|
||||||
|
Convert an image to WebP format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path (str): Path to the original image.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Path to the WebP image.
|
||||||
|
"""
|
||||||
|
if not os.path.exists(image_path):
|
||||||
|
logger.error(f"Image path does not exist: {image_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with Image.open(image_path) as img:
|
||||||
|
webp_path = os.path.splitext(image_path)[0] + '.webp'
|
||||||
|
img.save(webp_path, 'WEBP')
|
||||||
|
logger.info(f"Converted {image_path} to WebP")
|
||||||
|
return webp_path
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error converting image to WebP: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def compress_image_tinyfy(image_path):
|
||||||
|
"""
|
||||||
|
Compress the image using Tinyfy API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path (str): Path to the original image.
|
||||||
|
"""
|
||||||
|
if not os.path.exists(image_path):
|
||||||
|
logger.error(f"Image path does not exist: {image_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
source = tinify.from_file(image_path)
|
||||||
|
source.to_file(image_path)
|
||||||
|
logger.info(f"Compressed {image_path} using Tinyfy API")
|
||||||
|
except tinify.Error as e:
|
||||||
|
logger.error(f"Tinyfy API error: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def optimize_image(image_path):
|
||||||
|
image_path = convert_to_webp(image_path)
|
||||||
|
compress_image_tinyfy(image_path)
|
||||||
|
compress_image(image_path)
|
||||||
|
return image_path
|
||||||
@@ -1,76 +1,101 @@
|
|||||||
def save_blog_to_file(blog_content, blog_title,
|
import sys
|
||||||
blog_meta_desc, blog_tags, blog_categories, main_img_path, file_type="md"):
|
import os
|
||||||
""" Common function to save the generated blog to a file.
|
import re
|
||||||
arg: file_type can be md or html
|
import datetime
|
||||||
|
from textwrap import dedent
|
||||||
|
import logging
|
||||||
|
from zoneinfo import ZoneInfo
|
||||||
|
import nltk
|
||||||
|
from nltk.corpus import stopwords
|
||||||
|
from loguru import logger
|
||||||
|
logger.remove()
|
||||||
|
logger.add(sys.stdout,
|
||||||
|
colorize=True,
|
||||||
|
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def save_blog_to_file(blog_content, blog_title, blog_meta_desc, blog_tags, blog_categories, main_img_path, output_path, file_type="md"):
|
||||||
"""
|
"""
|
||||||
# Convert the spaces in blog_title with dash
|
Saves the provided blog content to a file in the specified format.
|
||||||
logger.info(f"The blog will be saved at: {output_path}")
|
|
||||||
logger.debug(f"Blog Title is: {blog_title}")
|
Args:
|
||||||
blog_title_md = blog_title
|
blog_content (str): The main content of the blog.
|
||||||
regex = re.compile('[^a-zA-Z0-9- ]')
|
blog_title (str): Title of the blog.
|
||||||
blog_title_md = regex.sub('', blog_title_md)
|
blog_meta_desc (str): Meta description of the blog.
|
||||||
blog_title= blog_title.replace(":", "")
|
blog_tags (list): List of tags associated with the blog.
|
||||||
blog_title_md = re.sub('--+', '-', blog_title_md)
|
blog_categories (list): List of categories associated with the blog.
|
||||||
blog_title_md = blog_title_md.replace(' ', '-')
|
main_img_path (str): Path to the main image of the blog.
|
||||||
|
output_path (str): Path to the directory where the blog will be saved.
|
||||||
|
file_type (str, optional): The file format for saving the blog ('md' for Markdown or 'html' for HTML). Defaults to 'md'.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
FileNotFoundError: If the output_path does not exist.
|
||||||
|
Exception: If the blog content cannot be written to the file.
|
||||||
|
"""
|
||||||
|
# Sanitize and prepare the blog title
|
||||||
|
# Remove colon and ampersand
|
||||||
|
blog_title_md = blog_title.replace(":", "").replace("&", "")
|
||||||
|
# Replace spaces with hyphens
|
||||||
|
blog_title_md = blog_title_md.replace(" ", "-")
|
||||||
|
blog_title_md = re.sub('[^A-Za-z0-9-]', '', blog_title_md)
|
||||||
|
# Replace multiple consecutive dashes with a single dash
|
||||||
|
blog_title_md = re.sub('-+', '-', blog_title_md)
|
||||||
blog_title_md = remove_stop_words(blog_title_md)
|
blog_title_md = remove_stop_words(blog_title_md)
|
||||||
|
logger.debug(f"Blog Title is: {blog_title_md}")
|
||||||
|
|
||||||
if ':' in blog_meta_desc:
|
# Check if output path exists
|
||||||
blog_meta_desc = blog_meta_desc.split(':')[1].strip()
|
|
||||||
|
|
||||||
if not os.path.exists(output_path):
|
if not os.path.exists(output_path):
|
||||||
logger.error("Error: Blog output directory is set to {output_path}, which Does Not Exist.")
|
logger.error(f"Error: Blog output directory is set to {output_path}, which does not exist.")
|
||||||
|
raise FileNotFoundError(f"Output directory does not exist: {output_path}")
|
||||||
|
|
||||||
# Different output formats are plaintext, html and markdown.
|
# Handle Markdown file type
|
||||||
if file_type in "md":
|
if file_type == "md":
|
||||||
logger.info(f"Writing/Saving the resultant blog content in Markdown format.")
|
logger.info("Writing/Saving the resultant blog content in Markdown format.")
|
||||||
# fill the Front Matter as below at the top of the post: https://jekyllrb.com/docs/front-matter/
|
dtobj = datetime.datetime.now(ZoneInfo('Asia/Kolkata'))
|
||||||
# date: YYYY-MM-DD HH:MM:SS +/-TTTT
|
formatted_date = dtobj.strftime('%Y-%m-%d %H:%M:%S %z')
|
||||||
from zoneinfo import ZoneInfo
|
blog_title = blog_title.replace(":", "-").replace('"', '')
|
||||||
tz=ZoneInfo('Asia/Kolkata')
|
blog_frontmatter = dedent(f"""\
|
||||||
dtobj = datetime.datetime.now(tz=ZoneInfo('Asia/Kolkata'))
|
|
||||||
formatted_date = f"{dtobj.strftime('%Y-%m-%d %H:%M:%S %z')}"
|
|
||||||
|
|
||||||
blog_frontmatter = f"""\
|
|
||||||
---
|
---
|
||||||
title: {blog_title}
|
title: {blog_title}
|
||||||
date: {formatted_date}
|
date: {formatted_date}
|
||||||
categories: [{blog_categories}]
|
categories: [{blog_categories}]
|
||||||
tags: [{blog_tags}]
|
tags: [{blog_tags}]
|
||||||
description: {blog_meta_desc}
|
description: {blog_meta_desc.replace(":", "-")}
|
||||||
img_path: '/assets/'
|
img_path: '/assets/'
|
||||||
image:
|
image:
|
||||||
path: {os.path.basename(main_img_path)}
|
path: {os.path.basename(main_img_path)}
|
||||||
alt: {blog_title}
|
alt: {blog_title}
|
||||||
---\n\n"""
|
---\n\n""")
|
||||||
|
|
||||||
# Create a new file named YYYY-MM-DD-TITLE.EXTENSION and put it in the _posts of the root directory.
|
|
||||||
# Please note that the EXTENSION must be one of md or markdown
|
|
||||||
blog_output_path = os.path.join(
|
blog_output_path = os.path.join(
|
||||||
output_path,
|
output_path,
|
||||||
f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
|
f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
|
||||||
)
|
)
|
||||||
# Save the generated blog content to a file.
|
|
||||||
|
# Write to the file
|
||||||
try:
|
try:
|
||||||
with open(blog_output_path, "w") as f:
|
with open(blog_output_path, "w") as f:
|
||||||
f.write(dedent(blog_frontmatter))
|
f.write(blog_frontmatter)
|
||||||
f.write(blog_content)
|
f.write(blog_content)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception(f"Failed to write blog content: {e}")
|
raise Exception(f"Failed to write blog content: {e}")
|
||||||
logger.info(f"\nSuccessfully saved and Posted blog at: {blog_output_path,}\n")
|
|
||||||
|
logger.info(f"Successfully saved and posted blog at: {blog_output_path}")
|
||||||
|
|
||||||
|
|
||||||
# Helper function
|
# Helper function
|
||||||
def remove_stop_words(sentence):
|
def remove_stop_words(sentence):
|
||||||
# Tokenize the sentence into words
|
"""
|
||||||
|
Removes stop words from a given sentence.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sentence (str): The sentence from which to remove stop words.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The sentence after removing stop words.
|
||||||
|
"""
|
||||||
words = nltk.word_tokenize(sentence)
|
words = nltk.word_tokenize(sentence)
|
||||||
|
|
||||||
# Get the list of English stop words
|
|
||||||
stop_words = set(stopwords.words('english'))
|
stop_words = set(stopwords.words('english'))
|
||||||
|
|
||||||
# Remove stop words from the sentence
|
|
||||||
filtered_words = [word for word in words if word.lower() not in stop_words]
|
filtered_words = [word for word in words if word.lower() not in stop_words]
|
||||||
|
return ' '.join(filtered_words)
|
||||||
# Join the filtered words back into a sentence
|
|
||||||
filtered_sentence = ' '.join(filtered_words)
|
|
||||||
|
|
||||||
return filtered_sentence
|
|
||||||
|
|||||||
@@ -1,44 +0,0 @@
|
|||||||
from bs4 import BeautifulSoup
|
|
||||||
import re
|
|
||||||
|
|
||||||
def create_table_of_contents(html_content):
|
|
||||||
"""
|
|
||||||
Create a table of contents for a given HTML content.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
html_content (str): HTML content of the blog post.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: HTML content with a table of contents.
|
|
||||||
"""
|
|
||||||
# Use BeautifulSoup to parse the HTML
|
|
||||||
soup = BeautifulSoup(html_content, 'html.parser')
|
|
||||||
|
|
||||||
# Find all header tags (h1, h2, h3, h4, h5, h6)
|
|
||||||
headers = soup.find_all(re.compile('^h[1-6]$'))
|
|
||||||
|
|
||||||
# Create a table of contents
|
|
||||||
toc = BeautifulSoup('<div id="table-of-contents"><h2>Table of Contents</h2><ul></ul></div>', 'html.parser')
|
|
||||||
toc_ul = toc.find('ul')
|
|
||||||
|
|
||||||
# Loop through headers and add them to the table of contents
|
|
||||||
for i, header in enumerate(headers, start=1):
|
|
||||||
header_id = f"toc_{i}"
|
|
||||||
header['id'] = header_id
|
|
||||||
|
|
||||||
toc_entry = soup.new_tag('li')
|
|
||||||
toc_link = soup.new_tag('a', href=f"#{header_id}")
|
|
||||||
toc_link.string = header.get_text()
|
|
||||||
toc_entry.append(toc_link)
|
|
||||||
toc_ul.append(toc_entry)
|
|
||||||
|
|
||||||
# Insert the table of contents at the beginning of the content
|
|
||||||
soup.insert(0, toc)
|
|
||||||
|
|
||||||
return str(soup)
|
|
||||||
|
|
||||||
# Example usage
|
|
||||||
html_content = "<h1>Title</h1><p>Some text</p><h2>Subtitle 1</h2><p>Text under subtitle 1</p><h2>Subtitle 2</h2><p>Text under subtitle 2</p>"
|
|
||||||
html_with_toc = create_table_of_contents(html_content)
|
|
||||||
print(html_with_toc)
|
|
||||||
|
|
||||||
@@ -7,6 +7,8 @@ import tempfile
|
|||||||
import openai
|
import openai
|
||||||
from html2image import Html2Image
|
from html2image import Html2Image
|
||||||
from tqdm import tqdm, trange
|
from tqdm import tqdm, trange
|
||||||
|
import google.generativeai as genai
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
logger.remove()
|
logger.remove()
|
||||||
logger.add(sys.stdout,
|
logger.add(sys.stdout,
|
||||||
@@ -36,7 +38,8 @@ def youtube_to_blog(video_url):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Summarizing the content of the YouTube video
|
# Summarizing the content of the YouTube video
|
||||||
audio_blog_content = summarize_youtube_video(audio_text)
|
audio_blog_content = summarize_youtube_video_openai(audio_text, "gemini")
|
||||||
|
logger.info("Successfully converted given URL to blog article.")
|
||||||
return audio_blog_content, audio_title
|
return audio_blog_content, audio_title
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in summarize_youtube_video: {e}")
|
logger.error(f"Error in summarize_youtube_video: {e}")
|
||||||
@@ -44,41 +47,51 @@ def youtube_to_blog(video_url):
|
|||||||
return audio_blog_content
|
return audio_blog_content
|
||||||
|
|
||||||
|
|
||||||
def summarize_youtube_video(user_content):
|
def summarize_youtube_video(user_contenti, gpt_providers):
|
||||||
"""Generates a summary of a YouTube video using OpenAI GPT-3 and displays a progress bar.
|
"""Generates a summary of a YouTube video using OpenAI GPT-3 and displays a progress bar.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
video_link: The URL of the YouTube video to summarize.
|
video_link: The URL of the YouTube video to summarize.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A string containing the summary of the video.
|
A string containing the summary of the video.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
logger.info("Start summarize_youtube_video..")
|
logger.info("Start summarize_youtube_video..")
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
You are an expert copywriter specializing in content optimization for SEO.
|
You are an expert copywriter specializing in digital content writing. I will provide you with a transcript.
|
||||||
Your task is to transform a given transcript into a well-structured and engaging blog article. Your objectives include:
|
Your task is to transform a given transcript into a well-structured and informative blog article.
|
||||||
|
Please follow the below objectives:
|
||||||
|
|
||||||
1. Master the Transcript: Understand main ideas, key points, and the core message.
|
1. Master the Transcript: Understand main ideas, key points, and the core message.
|
||||||
2. Sentence Structure: Rephrase while preserving logical flow and coherence. Dont quote anyone from video.
|
2. Sentence Structure: Rephrase while preserving logical flow and coherence. Dont quote anyone from video.
|
||||||
3. Identify Main Keywords: Determine the primary video topic.
|
3. Note: Check if the transcript is about programming, then include code examples and snippets in your article.
|
||||||
4. Keyword Integration: Naturally integrate keywords in headings, subheadings, and body text, avoiding overuse.
|
4. Write Unique Content: Avoid direct copying; rewrite in your own words.
|
||||||
5. Write Unique Content: Avoid direct copying; rewrite in your own words.
|
5. REMEMBER to avoid direct quoting and maintain uniqueness.
|
||||||
REMEMBER to avoid direct quoting and maintain uniqueness.
|
6. Proofread: Check for grammar, spelling, and punctuation errors.
|
||||||
6. Optimize for SEO: Implement meta tags, header tags, and appropriate keyword density.
|
7. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, interjections, and colloquialisms. 8. Avoid repetitive phrases and unnatural sentence structures.
|
||||||
7. Craft Engaging and Informative Article: Provide value and insight to readers.
|
9. Ensure Uniqueness: Guarantee the article is plagiarism-free.
|
||||||
8. Proofread: Check for grammar, spelling, and punctuation errors.
|
10. Punctuation: Use appropriate question marks at the end of questions.
|
||||||
9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, interjections, and colloquialisms. Avoid repetitive phrases and unnatural sentence structures.
|
11. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
|
||||||
10. Structuring: Include a Creative Title, SEO Meta-description, ## Introduction ##, bullet points or numbered lists if appropriate, FAQs, and Conclusion.
|
12. Rephrase words like 'video, youtube, channel' with 'article, blog' and such suitable words.
|
||||||
11. Ensure Uniqueness: Guarantee the article is plagiarism-free.
|
|
||||||
12. Punctuation: Use appropriate question marks at the end of questions.
|
|
||||||
13. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
|
|
||||||
14. Rephrase words like 'video, youtube, channel' with 'article, blog' and such suitable words.
|
|
||||||
|
|
||||||
Follow the above guidelines to create a well-optimized, unique, and informative article that will rank well in search engine results and engage readers effectively.
|
Follow the above guidelines to create a well-optimized, unique, and informative article,
|
||||||
Craft a blog content from the following transcript:\n{user_content}
|
that will rank well in search engine results and engage readers effectively.
|
||||||
|
Follow above guidelines to craft a blog content from the following transcript:\n{user_content}
|
||||||
"""
|
"""
|
||||||
try:
|
if 'gemini' in gpt_providers:
|
||||||
response = openai_chatgpt(prompt)
|
try:
|
||||||
return response
|
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
SystemError(f"Error in generating blog summary: {err}")
|
logger.error("Failed in getting GEMINI_API_KEY")
|
||||||
|
# Use gemini-pro model for text and image.
|
||||||
|
model = genai.GenerativeModel('gemini-pro')
|
||||||
|
try:
|
||||||
|
response = model.generate_content(prompt)
|
||||||
|
return response.text
|
||||||
|
except Exception as err:
|
||||||
|
logger.error("Failed to get response from gemini.")
|
||||||
|
elif 'openai' in gpt_providers:
|
||||||
|
try:
|
||||||
|
response = openai_chatgpt(prompt)
|
||||||
|
return response
|
||||||
|
except Exception as err:
|
||||||
|
SystemError(f"Error in generating blog summary: {err}")
|
||||||
|
|||||||
133
requirements.txt
133
requirements.txt
@@ -0,0 +1,133 @@
|
|||||||
|
aiofiles
|
||||||
|
aiohttp
|
||||||
|
aiosignal
|
||||||
|
annotated-types
|
||||||
|
anyio
|
||||||
|
args
|
||||||
|
async-timeout
|
||||||
|
asyncio
|
||||||
|
attrs
|
||||||
|
beautifulsoup4
|
||||||
|
blinker
|
||||||
|
blis
|
||||||
|
Brotli
|
||||||
|
catalogue
|
||||||
|
certifi
|
||||||
|
cffi
|
||||||
|
charset-normalizer
|
||||||
|
chromedriver-autoinstaller
|
||||||
|
click
|
||||||
|
clint
|
||||||
|
cloudpathlib
|
||||||
|
colorama
|
||||||
|
confection
|
||||||
|
cssselect2
|
||||||
|
cymem
|
||||||
|
dataclasses-json
|
||||||
|
decorator
|
||||||
|
distro
|
||||||
|
docopt
|
||||||
|
duckduckgo-search
|
||||||
|
exceptiongroup
|
||||||
|
fastapi
|
||||||
|
Flask
|
||||||
|
fonttools
|
||||||
|
frozenlist
|
||||||
|
greenlet
|
||||||
|
grpcio
|
||||||
|
grpcio-tools
|
||||||
|
h11
|
||||||
|
h2
|
||||||
|
hpack
|
||||||
|
html2image
|
||||||
|
html5lib
|
||||||
|
httpcore
|
||||||
|
httpx
|
||||||
|
hyperframe
|
||||||
|
idna
|
||||||
|
imageio
|
||||||
|
imageio-ffmpeg
|
||||||
|
itsdangerous
|
||||||
|
Jinja2
|
||||||
|
joblib
|
||||||
|
jsonpatch
|
||||||
|
jsonpointer
|
||||||
|
langchain
|
||||||
|
langchain-core
|
||||||
|
langcodes
|
||||||
|
langsmith
|
||||||
|
loguru
|
||||||
|
lxml
|
||||||
|
Markdown
|
||||||
|
markdown2
|
||||||
|
MarkupSafe
|
||||||
|
marshmallow
|
||||||
|
md2pdf
|
||||||
|
moviepy
|
||||||
|
multidict
|
||||||
|
murmurhash
|
||||||
|
mypy-extensions
|
||||||
|
nltk
|
||||||
|
numpy
|
||||||
|
openai
|
||||||
|
outcome
|
||||||
|
packaging
|
||||||
|
param
|
||||||
|
permchain
|
||||||
|
Pillow
|
||||||
|
playwright
|
||||||
|
preshed
|
||||||
|
proglog
|
||||||
|
protobuf
|
||||||
|
pycparser
|
||||||
|
pydantic
|
||||||
|
pydantic_core
|
||||||
|
pydub
|
||||||
|
pydyf
|
||||||
|
pyee
|
||||||
|
pyphen
|
||||||
|
PySocks
|
||||||
|
python-dotenv
|
||||||
|
python-multipart
|
||||||
|
pytube
|
||||||
|
PyYAML
|
||||||
|
regex
|
||||||
|
requests
|
||||||
|
selenium
|
||||||
|
serpapi
|
||||||
|
six
|
||||||
|
smart-open
|
||||||
|
sniffio
|
||||||
|
socksio
|
||||||
|
sortedcontainers
|
||||||
|
soupsieve
|
||||||
|
spacy-legacy
|
||||||
|
spacy-loggers
|
||||||
|
SQLAlchemy
|
||||||
|
srsly
|
||||||
|
stability-sdk
|
||||||
|
starlette
|
||||||
|
tavily-python
|
||||||
|
tenacity
|
||||||
|
thinc
|
||||||
|
tiktoken
|
||||||
|
tinycss2
|
||||||
|
tqdm
|
||||||
|
trio
|
||||||
|
trio-websocket
|
||||||
|
typer
|
||||||
|
typing-inspect
|
||||||
|
typing_extensions
|
||||||
|
urllib3
|
||||||
|
uvicorn
|
||||||
|
wasabi
|
||||||
|
weasel
|
||||||
|
weasyprint
|
||||||
|
webdriver-manager
|
||||||
|
webencodings
|
||||||
|
websocket-client
|
||||||
|
Werkzeug
|
||||||
|
wsproto
|
||||||
|
yarl
|
||||||
|
youtube-transcript-api
|
||||||
|
zopfli
|
||||||
|
|||||||
Reference in New Issue
Block a user