Blogen-V.000.0.1 Added features,Cleanup. WIP
This commit is contained in:
33
lib/blog_proof_reader.py
Normal file
33
lib/blog_proof_reader.py
Normal file
@@ -0,0 +1,33 @@
|
||||
def blog_proof_editor(blog_content, blog_keywords):
|
||||
"""
|
||||
Helper for blog proof reading.
|
||||
"""
|
||||
prompt = """I am looking for detailed editing and enhancement of the given blog post,
|
||||
with a particular focus on maintaining originality.
|
||||
The topic of the content is [{blog_keywords}]. Please go through the blog and make direct edits to improve it,
|
||||
ensuring the final output is both high-quality and original.
|
||||
Note: There are duplicates headings and corresponding paragraphs, rewrite into one subheading.
|
||||
|
||||
Here are the specific areas to focus on:
|
||||
|
||||
1). Ensure Originality: Edit any sections that lack originality, replacing them with unique and creative content.
|
||||
2). Eliminate Repetitive Language: Rewrite repetitive phrases with varied and engaging language.
|
||||
3). Vocabulary and Grammar Enhancement: Directly correct any grammatical errors and upgrade the
|
||||
vocabulary for better readability.
|
||||
4). Improve Sentence Structure: Enhance sentence construction for better clarity and flow.
|
||||
5). Tone and Brand Alignment: Adjust the tone, voice, personality of given content to make it unique.
|
||||
6). Optimize Content Structure: Reorganize the content for a more impactful presentation,
|
||||
including better paragraphing and transitions.
|
||||
7). Remove Redundancies: Important, Cut out any redundant information or overly complex jargon.
|
||||
8). Refine Overall Structure: Make structural changes to improve the overall impact of the content.
|
||||
9). Remember, rewrite all content that repeated, while maintaining the formatting of the given blog text.
|
||||
|
||||
Please apply these changes directly to the following blog text and provide the edited version:
|
||||
[blog_content]. """
|
||||
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error Blog Proof Reading: {err}")
|
||||
40
lib/combine_research_and_blog.py
Normal file
40
lib/combine_research_and_blog.py
Normal file
@@ -0,0 +1,40 @@
|
||||
def blog_with_research(report, blog):
|
||||
"""Combine the given online research and gpt blog content"""
|
||||
|
||||
prompt = f"""
|
||||
You are an expert copywriter specializing in content optimization for SEO.
|
||||
I will provide you with a research report and a blog content on the same topic.
|
||||
Treat the research report as the context for the blog and better it accordingly.
|
||||
Your task is to transform and combine the given research and blog content into a well-structured, unique
|
||||
and engaging blog article.
|
||||
Your objectives include:
|
||||
1. Master the report and blog content: Understand main ideas, key points, and the core message.
|
||||
2. Sentence Structure: Rephrase while preserving logical flow and coherence.
|
||||
3. Identify Main Keyword: Determine the primary topic and combine the articles on the main topic.
|
||||
4. Keyword Integration: Naturally integrate keywords in headings, subheadings, and body text, avoiding overuse.
|
||||
5. Write Unique Content: Avoid direct copying from given report and blog; rewrite in your own words and style.
|
||||
6. Optimize for SEO: Generate high quality informative content.
|
||||
Implement SEO best practises with appropriate keyword density.
|
||||
7. Craft Engaging and Informative Article: Provide value and insight to readers.
|
||||
8. Proofread: Important to Check for grammar, spelling, and punctuation errors.
|
||||
9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases,
|
||||
interjections, and colloquialisms. Avoid repetitive phrases and unnatural sentence structures.
|
||||
10. Structuring: Include an Introduction, subtopics and use bullet points or
|
||||
numbered lists if appropriate. Important to include FAQs, and Conclusion.
|
||||
11. Ensure Uniqueness: Guarantee the article is plagiarism-free. Write in unique, informative style.
|
||||
12. Punctuation: Use appropriate question marks at the end of questions.
|
||||
13. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
|
||||
14. REMEMBER to give final response as complete HTML.
|
||||
Follow these guidelines to create a well-optimized, unique, and informative article
|
||||
that will rank well in search engine results and engage readers effectively.
|
||||
|
||||
Create a blog post from the given research report and blog content below.
|
||||
Research report: {report}
|
||||
Blog content: {blog}
|
||||
"""
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in combining research report and blog content.")
|
||||
8
lib/config.json
Normal file
8
lib/config.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"wordpress_url": "https://latestaitools.in/",
|
||||
"wordpress_username": "username",
|
||||
"wordpress_password": "password",
|
||||
"image_dir": "path/to/image_dir",
|
||||
"output_path": "path/to/output_path"
|
||||
}
|
||||
|
||||
27
lib/convert_content_to_markdown.py
Normal file
27
lib/convert_content_to_markdown.py
Normal file
@@ -0,0 +1,27 @@
|
||||
def convert_tomarkdown_format(blog_content):
|
||||
""" Helper for converting content to markdown format for static sites. """
|
||||
prompt = f"""
|
||||
As an expert in markdown language format and font matter, used for static webpages.
|
||||
Your task is to convert and improve formatting of given blog content.
|
||||
Do Not modify the content, only modify to convert it into highly readable blog content.
|
||||
|
||||
Use below guidelines and include other best practises:
|
||||
1). Headers for Structure: Use # for main headings and increase the number of # for
|
||||
subheadings (##, ###, etc.). Organize given content into clear, hierarchical sections.
|
||||
2). Emphasizing Text: Use single asterisks or underscores for italic (*italic* or _italic_),
|
||||
double for bold (**bold** or __bold__), and triple for bold italic (***bold italic***).
|
||||
3). Lists: For unordered lists, use dashes, asterisks, or plus signs (-, *, +).
|
||||
For ordered lists, use numbers followed by periods (1., 2., etc.).
|
||||
4). Blockquotes: Use > for blockquotes, and add additional > for nested blockquotes.
|
||||
5). Code Blocks: Use backticks for inline code (code) and triple backticks for code blocks.
|
||||
Specify a language for syntax highlighting.
|
||||
6). Horizontal Lines: Create a horizontal line using three or more asterisks, dashes, or underscores (---, ***).
|
||||
7). Table Formatting: Use pipes | and dashes - to create tables. Align text with colons.
|
||||
|
||||
Convert the given blog content in well organised markdown content: {blog_content}"""
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in converting to Markdown format.")
|
||||
35
lib/convert_markdown_to_html.py
Normal file
35
lib/convert_markdown_to_html.py
Normal file
@@ -0,0 +1,35 @@
|
||||
def convert_markdown_to_html(md_content):
|
||||
""" Helper function to convert given text to HTML
|
||||
"""
|
||||
prompt =f"""
|
||||
You are a skilled web developer tasked with converting a Markdown-formatted text to HTML.
|
||||
You will be given text in markdown format. Follow these steps to perform the conversion:
|
||||
|
||||
1. Parse User's Markdown Input: You will receive a Markdown-formatted text as input from the user.
|
||||
Carefully analyze the provided Markdown text, paying attention to different elements such as headings (#),
|
||||
lists (unordered and ordered), bold and italic text, links, images, and code blocks.
|
||||
2. Generate and Validate HTML: Generate corresponding HTML code for each Markdown element following
|
||||
the conversion guidelines below. Ensure the generated HTML is well-structured and syntactically correct.
|
||||
3. Preserve Line Breaks: Markdown line breaks (soft breaks) represented by two spaces at the end of a
|
||||
line should be converted to <br> tags in HTML to preserve the line breaks.
|
||||
4. REMEMBER to generate complete, valid HTML response only.
|
||||
|
||||
Follow below Conversion Guidelines:
|
||||
- Headers: Convert Markdown headers (#, ##, ###, etc.) to corresponding HTML header tags (<h1>, <h2>, <h3>, etc.).
|
||||
- Lists: Convert unordered lists (*) and ordered lists (1., 2., 3., etc.) to <ul> and <ol> HTML tags, respectively.
|
||||
List items should be enclosed in <li> tags.
|
||||
- Emphasis: Convert bold (**) and italic (*) text to <strong> and <em> HTML tags, respectively.
|
||||
- Links: Convert Markdown links ([text](url)) to HTML anchor (<a>) tags. Ensure the href attribute contains the correct URL.
|
||||
- Images: Convert Markdown image tags () to HTML image (<img>) tags.
|
||||
Include the alt attribute for accessibility.
|
||||
- Code: Convert inline code (`code`) to <code> HTML tags. Convert code blocks (```) to <pre> HTML tags
|
||||
for preserving formatting.
|
||||
- Blockquotes: Convert blockquotes (>) to <blockquote> HTML tags.
|
||||
Convert the following Markdown text to HTML: {md_content}
|
||||
"""
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in convert to HTML")
|
||||
16
lib/get_blog_category.py
Normal file
16
lib/get_blog_category.py
Normal file
@@ -0,0 +1,16 @@
|
||||
def get_blog_categories(blog_article):
|
||||
"""
|
||||
Function to generate blog categories for given blog content.
|
||||
"""
|
||||
prompt = f"""As an expert SEO and content writer, I will provide you with blog content.
|
||||
Suggest only 2 blog categories which are most relevant to provided blog content,
|
||||
by identifying the main topic. Also consider the target audience and the
|
||||
blog's category taxonomy. Only reply with comma separated values. The blog content is: {blog_article}"
|
||||
"""
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating blog categories: {err}")
|
||||
else:
|
||||
return response
|
||||
15
lib/get_blog_conclusion.py
Normal file
15
lib/get_blog_conclusion.py
Normal file
@@ -0,0 +1,15 @@
|
||||
def get_blog_conclusion(blog_content):
|
||||
"""
|
||||
Accepts a blog content and concludes it.
|
||||
"""
|
||||
prompt = f"""As an expert SEO and blog writer, please conclude the given blog providing vital take aways,
|
||||
summarise key points (no more than 300 characters) in bullet points. The blog content: {blog_content}
|
||||
"""
|
||||
logger.info(f"Generating blog conclusion iwth prompt: {prompt}")
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating blog conclusion: {err}")
|
||||
else:
|
||||
return response
|
||||
18
lib/get_blog_content.py
Normal file
18
lib/get_blog_content.py
Normal file
@@ -0,0 +1,18 @@
|
||||
def generate_topic_outline(blog_title, num_subtopics):
|
||||
"""
|
||||
Given a blog title generate an outline for it
|
||||
"""
|
||||
# TBD: Remove hardcoding, make dynamic
|
||||
prompt = f"""As a SEO expert, suggest only {num_subtopics} beginner-friendly and
|
||||
insightful sub topics for the blog title: {blog_title}.
|
||||
Respond with only answer and no description, explanations."""
|
||||
|
||||
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
|
||||
# TBD: Include --niche
|
||||
logger.info(f"Prompt used for blog title Outline :\n{prompt}\n")
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
try:
|
||||
response = openai_chatgpt(prompt)
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating Blog Title: {err}")
|
||||
return response
|
||||
16
lib/get_blog_intro.py
Normal file
16
lib/get_blog_intro.py
Normal file
@@ -0,0 +1,16 @@
|
||||
def get_blog_intro(blog_title, blog_topics):
|
||||
"""
|
||||
Generate blog introduction as per title and sub topics
|
||||
"""
|
||||
prompt = f"""As a skilled wordsmith, I'll equip you with a blog title and relevant topics, tasking you with crafting an engaging introduction. Your challenge: Create a brief, compelling entry that entices readers to explore the entire post. This introduction must be concise (under 250 characters) yet powerful, clearly stating the blog's purpose and what readers stand to gain. Reply with only the introduction.
|
||||
|
||||
Intrigue your audience from the start with vibrant language, employing strong verbs and vivid descriptions. Address a common challenge your readers face, demonstrating empathy and positioning yourself as their go-to expert. Pose thought-provoking questions that prompt reader engagement and contemplation.
|
||||
|
||||
Remember, your words matter. This introduction serves as the cornerstone of the blog post. It should not only captivate attention but also encourage deeper exploration. Additionally, strategically integrate relevant keywords to enhance visibility on search engine results pages (SERPs). Your mission: Craft a blog introduction that resonates, leaving readers eager to delve further into the titled piece: '{blog_title}', covering these sub-topics: {blog_topics}."""
|
||||
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating Blog Introduction: {err}")
|
||||
return response
|
||||
17
lib/get_blog_meta_desc.py
Normal file
17
lib/get_blog_meta_desc.py
Normal file
@@ -0,0 +1,17 @@
|
||||
def generate_blog_description(blog_content):
|
||||
"""
|
||||
Prompt designed to give SEO optimized blog descripton
|
||||
"""
|
||||
prompt = f"""As an expert SEO and blog writer, Compose a compelling meta description for the given blog content,
|
||||
adhering to SEO best practices. Keep it between 150-160 characters, incorporating active verbs,
|
||||
avoiding all caps and excessive punctuation. Ensure relevance, engage users, and encourage clicks.
|
||||
Use keywords naturally and provide a glimpse of the content's value to entice readers.
|
||||
Respond with only one of your best effort and do not include your explanations.
|
||||
Blog Content: {blog_content}"""
|
||||
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating blog description: {err}")
|
||||
18
lib/get_blog_outline.py
Normal file
18
lib/get_blog_outline.py
Normal file
@@ -0,0 +1,18 @@
|
||||
def generate_topic_outline(blog_title, num_subtopics):
|
||||
"""
|
||||
Given a blog title generate an outline for it
|
||||
"""
|
||||
# TBD: Remove hardcoding, make dynamic
|
||||
prompt = f"""As a SEO expert, suggest only {num_subtopics} beginner-friendly and
|
||||
insightful sub topics for the blog title: {blog_title}.
|
||||
Respond with only answer and no description, explanations."""
|
||||
|
||||
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
|
||||
# TBD: Include --niche
|
||||
logger.info(f"Prompt used for blog title Outline :\n{prompt}\n")
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
try:
|
||||
response = openai_chatgpt(prompt)
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating Blog Title: {err}")
|
||||
return response
|
||||
16
lib/get_blog_tags.py
Normal file
16
lib/get_blog_tags.py
Normal file
@@ -0,0 +1,16 @@
|
||||
def get_blog_tags(blog_article):
|
||||
"""
|
||||
Function to suggest tags for the given blog content
|
||||
"""
|
||||
# Suggest at least 5 tags for the following blog post [Enter your blog post text here].
|
||||
prompt = f"""As an expert SEO and blog writer, suggest only 2 relevant and specific blog tags
|
||||
for the given blog content. Only reply with comma separated values.
|
||||
Blog content: {blog_article}."""
|
||||
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating blog tags: {err}")
|
||||
else:
|
||||
return response
|
||||
20
lib/get_blog_title.py
Normal file
20
lib/get_blog_title.py
Normal file
@@ -0,0 +1,20 @@
|
||||
def generate_blog_title(blog_meta_desc):
|
||||
"""
|
||||
Given a blog title generate an outline for it
|
||||
"""
|
||||
# TBD: Remove hardcoding, make dynamic
|
||||
prompt = f"""As a SEO expert and content writer, I will provide you with meta description of blog.
|
||||
Your task is write a SEO optimized, call to action and engaging blog title for it.
|
||||
Follows SEO best practises to suggest the blog title.
|
||||
Please keep the titles concise, not exceeding 60 words, and ensure to maintain their meaning.
|
||||
Respond with only one title and no description or keyword like Title:
|
||||
Generate blog title for this given blog content: {blog_meta_desc}
|
||||
"""
|
||||
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
|
||||
# TBD: Include --niche
|
||||
logger.info(f"Prompt used for blog title :{prompt}")
|
||||
try:
|
||||
response = openai_chatgpt(prompt)
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating Blog Title: {err}")
|
||||
return response
|
||||
47
lib/get_blog_topics.py
Normal file
47
lib/get_blog_topics.py
Normal file
@@ -0,0 +1,47 @@
|
||||
def generate_blog_topics(blog_keywords, num_blogs, niche):
|
||||
"""
|
||||
For a given prompt, generate blog topics.
|
||||
Using the davinci-instruct-beta-v3 model. It’s proven to be an ideal
|
||||
one for generating unique blog content.
|
||||
Ex: Generate SEO optimized blog topics on given keywords
|
||||
"""
|
||||
prompt = f"""As an SEO specialist and blog writer, write {num_blogs} catchy
|
||||
and SEO-friendly blog topics on {blog_keywords}. The blog title must be less than 80 characters.
|
||||
The blog titles must follow best SEO practises, be engaging and invite/tempt users to read full blog.
|
||||
Do not include descriptions, explanations. Do not number the result."""
|
||||
|
||||
# Beware of keywords stuffing, clustering, semantic should help avoid.
|
||||
if num_blogs > 5:
|
||||
# Get more keywords, based on user given keywords.
|
||||
more_keywords = get_related_keywords(num_blogs, blog_keywords, niche)
|
||||
prompt = prompt + """Use the following keywords wisely, without keyword stuffing: {more_keywords}"""
|
||||
|
||||
logger.info(f"Prompt used for generating blog topics: \n{prompt}\n")
|
||||
try:
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating blog topics: {err}")
|
||||
|
||||
|
||||
def get_related_keywords(num_blogs, keywords, niche):
|
||||
"""
|
||||
Helper function to get more keywords from GPTs.
|
||||
"""
|
||||
# Check if niche: use long tailed, else use popular keywords.
|
||||
if niche:
|
||||
prompt = (f"Generate a list without description of the top {num_blogs} most popular and semantically"
|
||||
f"related long-tailed keywords and entities for the topic of {keywords} that are used in"
|
||||
"high-quality content and relevant to my competitors."
|
||||
)
|
||||
else:
|
||||
prompt = (f"Generate a list without description of the top {num_blogs} most popular and"
|
||||
f" semantically related keywords and entities for the topic of {keywords} that are used"
|
||||
" in high-quality content and relevant to my competitors."
|
||||
)
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in getting related keywords.")
|
||||
@@ -22,11 +22,13 @@ nltk.download('punkt', quiet=True)
|
||||
from nltk.corpus import stopwords
|
||||
nltk.download('stopwords', quiet=True)
|
||||
|
||||
from .gpt_providers.openai_gpt_provider import openai_chatgpt, gen_new_from_given_img
|
||||
from .gpt_providers.openai_gpt_provider import analyze_and_extract_details_from_image
|
||||
from .gpt_providers.openai_gpt_provider import gen_new_from_given_img
|
||||
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from .gpt_providers.gpt_vision_img_details import analyze_and_extract_details_from_image
|
||||
from .generate_image_from_prompt import generate_image
|
||||
from .write_blogs_from_youtube_videos import youtube_to_blog
|
||||
from .wordpress_blog_uploader import compress_image, upload_blog_post, upload_media
|
||||
from .gpt_online_researcher import do_online_research
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
@@ -35,33 +37,69 @@ logger.add(sys.stdout,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
# Load configuration
|
||||
#with open('config.json') as config_file:
|
||||
# config = json.load(config_file)
|
||||
|
||||
#wordpress_url = config['wordpress_url']
|
||||
# fixme: Remove the hardcoding, need add another option OR in config ?
|
||||
image_dir = "pseo_website/assets/"
|
||||
image_dir = "blog_images"
|
||||
image_dir = os.path.join(os.getcwd(), image_dir)
|
||||
# TBD: This can come from config file.
|
||||
output_path = "pseo_website/_posts/"
|
||||
output_path = "blogs"
|
||||
output_path = os.path.join(os.getcwd(), output_path)
|
||||
wordpress_url = ''
|
||||
wordpress_username = ''
|
||||
wordpress_password = ''
|
||||
wordpress_username = 'upaudel750'
|
||||
wordpress_password = 'YvCS VbzQ QSp8 4XZe 0DUw Myys'
|
||||
|
||||
|
||||
def generate_youtube_blog(yt_url_list):
|
||||
def generate_youtube_blog(yt_url_list, output_format="markdown"):
|
||||
"""Takes a list of youtube videos and generates blog for each one of them.
|
||||
"""
|
||||
# Use to store the blog in a string, to save in a *.md file.
|
||||
blog_markdown_str = ""
|
||||
for a_yt_url in yt_url_list:
|
||||
try:
|
||||
yt_img_path, yt_blog = youtube_to_blog(a_yt_url)
|
||||
logger.info(f"Starting to write blog on URL: {a_yt_url}")
|
||||
yt_blog, yt_title = youtube_to_blog(a_yt_url)
|
||||
if not yt_title or not yt_blog:
|
||||
logger.error("No content or title for audio to proceed.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in youtube_to_blog: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
logger.info(f"Starting with online research for URL title: {yt_title}")
|
||||
research_report = do_online_research(yt_title)
|
||||
if not research_report:
|
||||
logger.error(f"Error in do_online_research returned no report: {e}")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in do_online_research: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Note: Check if the order of input matters for your function
|
||||
logger.info("Preparing a blog content from audio script and online research content...")
|
||||
blog_with_research(research_report, yt_blog)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in blog_with_research: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Get the title and meta description of the blog.
|
||||
title = generate_blog_title(yt_blog)
|
||||
blog_meta_desc = generate_blog_description(yt_blog)
|
||||
title = generate_blog_title(blog_meta_desc)
|
||||
logger.info(f"Title is {title} and description is {blog_meta_desc}")
|
||||
#blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
|
||||
# Generate an introduction for the blog
|
||||
blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
|
||||
# Get blog tags and categories.
|
||||
blog_tags = get_blog_tags(blog_meta_desc)
|
||||
logger.info(f"Blog tags are: {blog_tags}")
|
||||
blog_categories = get_blog_categories(blog_meta_desc)
|
||||
logger.info(f"Blog categories are: {blog_categories}")
|
||||
|
||||
# Generate an introduction for the blog
|
||||
blog_intro = get_blog_intro(title, yt_blog)
|
||||
logger.info(f"The Blog intro is:\n {blog_intro}")
|
||||
blog_markdown_str = blog_markdown_str + "\n\n" + f"{blog_intro}" + "\n\n"
|
||||
@@ -86,20 +124,23 @@ def generate_youtube_blog(yt_url_list):
|
||||
blog_conclusion = get_blog_conclusion(blog_markdown_str)
|
||||
# TBD: Add another image.
|
||||
blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n\n" + f"{blog_conclusion}" + "\n"
|
||||
print(f"Conclusion: {blog_markdown_str}")
|
||||
|
||||
# Get blog tags and categories.
|
||||
blog_tags = get_blog_tags(yt_blog)
|
||||
logger.info(f"Blog tags are: {blog_tags}")
|
||||
blog_categories = get_blog_categories(yt_blog)
|
||||
logger.info(f"Blog categories are: {blog_categories}")
|
||||
|
||||
save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
|
||||
# Proofread the blog, edit and remove dubplicates and refine it further.
|
||||
# Presently, fixing the blog keywords to be tags and categories.
|
||||
blog_keywords = f"{blog_tags} + {blog_categories}"
|
||||
blog_markdown_str = blog_proof_editor(blog_markdown_str, blog_keywords)
|
||||
|
||||
# Check the type of blog format needed by the user.
|
||||
if 'html' in output_format:
|
||||
blog_markdown_str = convert_tomarkdown_format(blog_markdown_str)
|
||||
elif 'markdown' in output_path:
|
||||
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
|
||||
|
||||
save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
|
||||
#print(html_blog)
|
||||
# Try to save the blog content in a file, in whichever format. Just dump it.
|
||||
try:
|
||||
save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
|
||||
except Exception as err:
|
||||
logger.error("Failed to Save blog content: {blog_markdown_str}")
|
||||
|
||||
except Exception as e:
|
||||
# raise assertionerror
|
||||
@@ -108,7 +149,7 @@ def generate_youtube_blog(yt_url_list):
|
||||
|
||||
|
||||
def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
|
||||
wordpress=False, output_format="HTML"):
|
||||
wordpress=False, research_online=False, output_format="HTML"):
|
||||
"""
|
||||
This function will take a blog Topic to first generate sections for it
|
||||
and then generate content for each section.
|
||||
@@ -136,7 +177,7 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
|
||||
blog_intro = get_blog_intro(a_blog_topic, tpc_outlines)
|
||||
logger.info(f"The intro is:\n{blog_intro}")
|
||||
blog_markdown_str = blog_markdown_str + "### Introduction" + "\n\n" + f"{blog_intro}" + "\n\n"
|
||||
print(f"\n\n 1 -- BLOG_STR : {blog_markdown_str}\n\n")
|
||||
|
||||
# Now, for each blog we have sub topic. Generate content for each of the sub topic.
|
||||
for a_outline in tpc_outlines:
|
||||
a_outline = a_outline.replace('"', '')
|
||||
@@ -145,7 +186,6 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
|
||||
# a_outline is sub topic heading, hence part ToC also.
|
||||
#blog_markdown_str = blog_markdown_str + "\n\n" + f"### {a_outline}" + "\n\n"
|
||||
blog_markdown_str = blog_markdown_str + "\n" + f"\n {sub_topic_content}" + "\n\n"
|
||||
print(f"\n\n 3 -- BLOG_STR : {blog_markdown_str}\n\n")
|
||||
|
||||
# Get the Conclusion of the blog, by passing the generated blog.
|
||||
blog_conclusion = get_blog_conclusion(blog_markdown_str)
|
||||
@@ -154,6 +194,11 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
|
||||
# logger.info/check the final blog content.
|
||||
logger.info(f"Final blog content: {blog_markdown_str}")
|
||||
|
||||
#if research_online:
|
||||
# # Call on the got-researcher, tavily apis for this. So many apis floating around.
|
||||
# report = do_online_research_on(blog_keywords)
|
||||
# blog_markdown_str = blog_with_research(report, blog_markdown_str)
|
||||
|
||||
blog_meta_desc = generate_blog_description(blog_markdown_str)
|
||||
logger.info(f"\nThe blog meta description is:{blog_meta_desc}\n")
|
||||
|
||||
@@ -162,10 +207,10 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
|
||||
main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
|
||||
|
||||
blog_tags = get_blog_tags(blog_markdown_str)
|
||||
logger.info(f"\nBlog tags for generated content: {blog_tags}")
|
||||
logger.info(f"\nBlog tags for generated content: {blog_tags}\n")
|
||||
|
||||
blog_categories = get_blog_categories(blog_markdown_str)
|
||||
logger.info(f"Generated blog categories: {blog_categories}")
|
||||
logger.info(f"Generated blog categories: {blog_categories}\n")
|
||||
|
||||
# Use chatgpt to convert the text into HTML or markdown.
|
||||
if 'html' in output_format:
|
||||
@@ -178,6 +223,7 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
|
||||
# Similar tags and categories will be missed.
|
||||
# blog_categories =
|
||||
# blog_tags =
|
||||
logger.info("Uploading the blog to wordpress.\n")
|
||||
main_img_path = compress_image(main_img_path, quality=85)
|
||||
try:
|
||||
img_details = analyze_and_extract_details_from_image(main_img_path)
|
||||
@@ -247,14 +293,16 @@ def generate_blog_title(blog_meta_desc):
|
||||
Given a blog title generate an outline for it
|
||||
"""
|
||||
# TBD: Remove hardcoding, make dynamic
|
||||
prompt = f"""As a SEO expert and content writer, I will provide you with blog. Your task is write title for it.
|
||||
prompt = f"""As a SEO expert and content writer, I will provide you with meta description of blog.
|
||||
Your task is write a SEO optimized, call to action and engaging blog title for it.
|
||||
Follows SEO best practises to suggest the blog title.
|
||||
Please keep the titles concise, not exceeding 60 words, and ensure to maintain their meaning.
|
||||
Respond with only one title and no description, for this given blog content: {blog_meta_desc}
|
||||
Respond with only one title and no description or keyword like Title:
|
||||
Generate blog title for this given meta description: {blog_meta_desc}
|
||||
"""
|
||||
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
|
||||
# TBD: Include --niche
|
||||
logger.debug(f"Prompt used for blog title :{prompt}")
|
||||
logger.info(f"Prompt used for blog title :{prompt}")
|
||||
try:
|
||||
response = openai_chatgpt(prompt)
|
||||
except Exception as err:
|
||||
@@ -494,6 +542,35 @@ def remove_stop_words(sentence):
|
||||
return filtered_sentence
|
||||
|
||||
|
||||
def convert_tomarkdown_format(blog_content):
|
||||
""" Helper for converting content to markdown format for static sites. """
|
||||
prompt = f"""
|
||||
As an expert in markdown language format and font matter, used for static webpages.
|
||||
Your task is to convert and improve formatting of given blog content.
|
||||
Do Not modify the content, only modify to convert it into highly readable blog content.
|
||||
|
||||
Use below guidelines and include other best practises:
|
||||
1). Headers for Structure: Use # for main headings and increase the number of # for
|
||||
subheadings (##, ###, etc.). Organize given content into clear, hierarchical sections.
|
||||
2). Emphasizing Text: Use single asterisks or underscores for italic (*italic* or _italic_),
|
||||
double for bold (**bold** or __bold__), and triple for bold italic (***bold italic***).
|
||||
3). Lists: For unordered lists, use dashes, asterisks, or plus signs (-, *, +).
|
||||
For ordered lists, use numbers followed by periods (1., 2., etc.).
|
||||
4). Blockquotes: Use > for blockquotes, and add additional > for nested blockquotes.
|
||||
5). Code Blocks: Use backticks for inline code (code) and triple backticks for code blocks.
|
||||
Specify a language for syntax highlighting.
|
||||
6). Horizontal Lines: Create a horizontal line using three or more asterisks, dashes, or underscores (---, ***).
|
||||
7). Table Formatting: Use pipes | and dashes - to create tables. Align text with colons.
|
||||
|
||||
Convert the given blog content in well organised markdown content: {blog_content}"""
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in converting to Markdown format.")
|
||||
|
||||
|
||||
def convert_markdown_to_html(md_content):
|
||||
""" Helper function to convert given text to HTML
|
||||
"""
|
||||
@@ -527,5 +604,86 @@ def convert_markdown_to_html(md_content):
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in convert to HTML")
|
||||
|
||||
|
||||
def blog_with_research(report, blog):
|
||||
"""Combine the given online research and gpt blog content"""
|
||||
|
||||
prompt = f"""
|
||||
You are an expert copywriter specializing in content optimization for SEO.
|
||||
I will provide you with a research report and a blog content on the same topic.
|
||||
Treat the research report as the context for the blog and better it accordingly.
|
||||
Your task is to transform and combine the given research and blog content into a well-structured, unique
|
||||
and engaging blog article.
|
||||
Your objectives include:
|
||||
1. Master the report and blog content: Understand main ideas, key points, and the core message.
|
||||
2. Sentence Structure: Rephrase while preserving logical flow and coherence.
|
||||
3. Identify Main Keyword: Determine the primary topic and combine the articles on the main topic.
|
||||
4. Keyword Integration: Naturally integrate keywords in headings, subheadings, and body text, avoiding overuse.
|
||||
5. Write Unique Content: Avoid direct copying from given report and blog; rewrite in your own words and style.
|
||||
6. Optimize for SEO: Generate high quality informative content.
|
||||
Implement SEO best practises with appropriate keyword density.
|
||||
7. Craft Engaging and Informative Article: Provide value and insight to readers.
|
||||
8. Proofread: Important to Check for grammar, spelling, and punctuation errors.
|
||||
9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases,
|
||||
interjections, and colloquialisms. Avoid repetitive phrases and unnatural sentence structures.
|
||||
10. Structuring: Include an Introduction, subtopics and use bullet points or
|
||||
numbered lists if appropriate. Important to include FAQs, and Conclusion.
|
||||
11. Ensure Uniqueness: Guarantee the article is plagiarism-free. Write in unique, informative style.
|
||||
12. Punctuation: Use appropriate question marks at the end of questions.
|
||||
13. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
|
||||
14. REMEMBER to give final response as complete HTML.
|
||||
Follow these guidelines to create a well-optimized, unique, and informative article
|
||||
that will rank well in search engine results and engage readers effectively.
|
||||
|
||||
Create a blog post from the given research report and blog content below.
|
||||
Research report: {report}
|
||||
Blog content: {blog}
|
||||
"""
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in getting related keywords.")
|
||||
|
||||
|
||||
def blog_proof_editor(blog_content, blog_keywords):
|
||||
"""
|
||||
Helper for blog proof reading.
|
||||
"""
|
||||
if not blog_content and not blog_keywords:
|
||||
logger.error("Blog proof reader has no content to proofread.")
|
||||
exit(1)
|
||||
|
||||
prompt = f"""I am looking for detailed editing and enhancement of the given blog post,
|
||||
with a particular focus on maintaining originality.
|
||||
The topic of the content is [{blog_keywords}]. Please go through the blog and make direct edits to improve it,
|
||||
ensuring the final output is both high-quality and original.
|
||||
Note: There are duplicates headings and corresponding paragraphs, rewrite into one subheading.
|
||||
|
||||
Here are the specific areas to focus on:
|
||||
|
||||
1). Ensure Originality: Edit any sections that lack originality, replacing them with unique and creative content.
|
||||
2). Eliminate Repetitive Language: Rewrite repetitive phrases with varied and engaging language.
|
||||
3). Vocabulary and Grammar Enhancement: Directly correct any grammatical errors and upgrade the
|
||||
vocabulary for better readability.
|
||||
4). Improve Sentence Structure: Enhance sentence construction for better clarity and flow.
|
||||
5). Tone and Brand Alignment: Adjust the tone, voice, personality of given content to make it unique.
|
||||
6). Optimize Content Structure: Reorganize the content for a more impactful presentation,
|
||||
including better paragraphing and transitions.
|
||||
7). Remove Redundancies: Important, Cut out any redundant information or overly complex jargon.
|
||||
8). Refine Overall Structure: Make structural changes to improve the overall impact of the content.
|
||||
9). Remember, rewrite all content that repeated, while maintaining the formatting of the given blog text.
|
||||
|
||||
Please apply these changes directly to the following blog text and provide the edited version:
|
||||
[{blog_content}]. """
|
||||
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error Blog Proof Reading: {err}")
|
||||
|
||||
19
lib/get_topic_content.py
Normal file
19
lib/get_topic_content.py
Normal file
@@ -0,0 +1,19 @@
|
||||
def generate_topic_content(blog_keywords, sub_topic):
|
||||
"""
|
||||
For each of given topic generate content for it.
|
||||
"""
|
||||
# The outline should contain various subheadings and include the starting sentence for each section.
|
||||
# TBD: Depending on the usecase 'Voice and style' will change to professional etc.
|
||||
prompt = f"""As a professional blogger and topic authority on {blog_keywords},
|
||||
craft factual (no more than 200 characters) subtopic content on {sub_topic}.
|
||||
Your response should reflect Experience, Expertise, Authoritativeness and Trustworthiness from content.
|
||||
Voice and style guide: Write in a professional manner, giving enlightening details and reasons.
|
||||
Use natural language and phrases that a real person would use: in normal conversations.
|
||||
Format your response using markdown. REMEMBER Not to include introduction or conclusion in your response.
|
||||
Use headings(h3 to h6 only), subheadings, bullet points, and bold to organize the information."""
|
||||
logger.info(f"Generate topic content using prompt:\n{prompt}\n")
|
||||
try:
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating topic content: {err}")
|
||||
108
lib/google_search_gpt_vision.py
Normal file
108
lib/google_search_gpt_vision.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import re #additional import for regex
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(
|
||||
api_key=os.getenv('OPENAI-API-KEY')
|
||||
)
|
||||
|
||||
# Target URL can be a website url or it can google search
|
||||
query = "kedarkanta trek"
|
||||
target_url = f"https://www.google.com/search?q={query}&gl=us"
|
||||
response = requests.get(target_url)
|
||||
print
|
||||
html_text = response.text
|
||||
|
||||
# Remove unnecessary part to prevent HUGE TOKEN cost!
|
||||
# Remove everything between <head> and </head>
|
||||
html_text = re.sub(r'<head.*?>.*?</head>', '', html_text, flags=re.DOTALL)
|
||||
# Remove all occurrences of content between <script> and </script>
|
||||
html_text = re.sub(r'<script.*?>.*?</script>', '', html_text, flags=re.DOTALL)
|
||||
# Remove all occurrences of content between <style> and </style>
|
||||
html_text = re.sub(r'<style.*?>.*?</style>', '', html_text, flags=re.DOTALL)
|
||||
|
||||
completion = client.chat.completions.create(
|
||||
model="gpt-4-1106-preview",
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a master at scraping Google results data. Scrape two things: 1st. Scrape top 10 organic results data and 2nd. Scrape people_also_ask section from Google search result page."},
|
||||
{"role": "user", "content": html_text}
|
||||
],
|
||||
tools=[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "parse_organic_results",
|
||||
"description": "Parse organic results from Google SERP raw HTML data nicely",
|
||||
"parameters": {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'data': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'title': {'type': 'string'},
|
||||
'original_url': {'type': 'string'},
|
||||
'snippet': {'type': 'string'},
|
||||
'position': {'type': 'integer'}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "parse_people_also_ask_section",
|
||||
"description": "Parse `people also ask` section from Google SERP raw HTML",
|
||||
"parameters": {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'data': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'question': {'type': 'string'},
|
||||
'original_url': {'type': 'string'},
|
||||
'answer': {'type': 'string'},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
tool_choice="auto"
|
||||
)
|
||||
|
||||
|
||||
# Organic_results
|
||||
argument_str = completion.choices[0].message.tool_calls[0].function.arguments
|
||||
argument_dict = json.loads(argument_str)
|
||||
organic_results = argument_dict['data']
|
||||
|
||||
print('Organic results:')
|
||||
for result in organic_results:
|
||||
print(f"Blog Title: {result['title']}")
|
||||
print(f"Blog URL: {result['original_url']}")
|
||||
print(f"Blog Snippet: {result['snippet']}")
|
||||
print(f"Blog Position: {result['position']}")
|
||||
print('---')
|
||||
|
||||
# People also ask
|
||||
argument_str = completion.choices[0].message.tool_calls[1].function.arguments
|
||||
argument_dict = json.loads(argument_str)
|
||||
people_also_ask = argument_dict['data']
|
||||
|
||||
print('People also ask:')
|
||||
for result in people_also_ask:
|
||||
print(f"People_Also_Ask: Question: {result['question']}")
|
||||
print(f"People_Also_Ask: URL: {result['original_url']}")
|
||||
print("People_Also_Ask: Answer: {result['answer']}")
|
||||
print('---')
|
||||
38
lib/google_search_serpapi.py
Normal file
38
lib/google_search_serpapi.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# Not using it, as they wanted phone verification done.
|
||||
|
||||
import os
|
||||
import serpapi
|
||||
import csv
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
api_key = os.getenv('SERPAPI_KEY')
|
||||
|
||||
client = serpapi.Client(api_key=api_key)
|
||||
result = client.search(
|
||||
q="Retrieval Augumented Generation RAG",
|
||||
engine="google",
|
||||
location="Austin, Texas",
|
||||
hl="en",
|
||||
gl="us",
|
||||
)
|
||||
|
||||
print(result)
|
||||
print(result['organic_results'])
|
||||
print(result["search_information"]["total_results"]) # Get number of results available
|
||||
print(result["related_questions"]) # Get all the related questions
|
||||
|
||||
|
||||
organic_results = result["organic_results"]
|
||||
with open('output.csv', 'w', newline='') as csvfile:
|
||||
csv_writer = csv.writer(csvfile)
|
||||
|
||||
# Write the headers
|
||||
csv_writer.writerow(["Title", "Link", "Snippet"])
|
||||
|
||||
# Write the data
|
||||
for result in organic_results:
|
||||
csv_writer.writerow([result["title"], result["link"], result["snippet"]])
|
||||
|
||||
|
||||
print('Done writing to CSV file.')
|
||||
@@ -16,54 +16,68 @@
|
||||
#
|
||||
##############################################################
|
||||
|
||||
# import and connect
|
||||
import os
|
||||
import logging
|
||||
from tavily import TavilyClient
|
||||
from langchain.adapters.openai import convert_openai_messages
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
|
||||
def do_research_on(research_query):
|
||||
"""
|
||||
Basically sending in the blog title to do research on.
|
||||
gpt-researcher API version to do extensive web research for given keywords.
|
||||
"""
|
||||
# $ export TAVILY_API_KEY={Your Tavily API Key here}
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s-%(levelname)s-%(module)s-%(lineno)d-%(message)s')
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def do_online_research(query):
|
||||
try:
|
||||
client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
|
||||
except Exception as err:
|
||||
SystemExit(f"Failed to create TavilyClient: {err}")
|
||||
# Retrieve API keys
|
||||
api_key = os.getenv('TAVILY_API_KEY')
|
||||
openai_api_key = os.getenv('OPENAI_API_KEY')
|
||||
if not api_key or not openai_api_key:
|
||||
raise ValueError("API keys for Tavily or OpenAI are not set.")
|
||||
|
||||
try:
|
||||
# run tavily search
|
||||
research_content = client.search(
|
||||
research_query,
|
||||
search_depth="advanced",
|
||||
include_answer=True,
|
||||
max_results=10)["results"]
|
||||
except Exception as err:
|
||||
SystemExit(f"Unable to do tavily search: {err}")
|
||||
# Initialize Tavily client
|
||||
try:
|
||||
client = TavilyClient(api_key=api_key)
|
||||
except Exception as err:
|
||||
logging.error("Failed to create Tavily client. Check TAVILY_API_KEY")
|
||||
exit(1)
|
||||
|
||||
# setup prompt
|
||||
prompt = [{
|
||||
"role": "system",
|
||||
"content": f'You are an AI critical thinker research assistant. '\
|
||||
f'Your sole purpose is to write well written, critically acclaimed,'\
|
||||
f'objective and structured reports on given text.'
|
||||
# Run tavily search
|
||||
logging.info(f"Running Tavily search on: {query}")
|
||||
try:
|
||||
content = client.search(query, search_depth="advanced")["results"]
|
||||
except Exception as err:
|
||||
logging.error(f"Failed to do Tavily Research: {err}")
|
||||
exit(1)
|
||||
|
||||
# Setup prompt for GPT-4
|
||||
prompt = [{
|
||||
"role": "system",
|
||||
"content": ('You are an AI critical thinker research assistant. '
|
||||
'Your sole purpose is to write well written, critically acclaimed, '
|
||||
'objective and structured reports on given text.')
|
||||
}, {
|
||||
"role": "user",
|
||||
"content": f'Information: """{research_content}"""\n\n' \
|
||||
f'Using the above information, answer the following'\
|
||||
f'query: "{research_query}" in a detailed report --'\
|
||||
f'Please use MLA format and markdown syntax.'
|
||||
"role": "user",
|
||||
"content": (f'Information: """{content}"""\n\n'
|
||||
f'Using the above information, answer the following '
|
||||
f'query: "{query}" in a detailed report --'
|
||||
f'Please use MLA format and markdown syntax.')
|
||||
}]
|
||||
|
||||
# run gpt-4
|
||||
try:
|
||||
# Run GPT-4
|
||||
logging.info("Generating report with GPT-4...")
|
||||
lc_messages = convert_openai_messages(prompt)
|
||||
research_report = ChatOpenAI(
|
||||
model='gpt-4',
|
||||
openai_api_key=openai_api_key
|
||||
).invoke(lc_messages).content
|
||||
except Exception as err:
|
||||
SystemExit(f"Failed to convert OpenAI message and get response.")
|
||||
try:
|
||||
report = ChatOpenAI(model='gpt-4', openai_api_key=openai_api_key).invoke(lc_messages).content
|
||||
logging.info(f"\n Below is the online research report for given keywords/title: \n\n{report}")
|
||||
return report
|
||||
except Exception as err:
|
||||
logging.error("Failed to generate do_online_research with ChatOpenAI")
|
||||
exit(1)
|
||||
|
||||
# print report
|
||||
print(research_report)
|
||||
return research_report
|
||||
except Exception as e:
|
||||
logging.error(f"Failed in online research: {e}")
|
||||
exit(1)
|
||||
|
||||
@@ -1,19 +1,11 @@
|
||||
gpt_providers are companies providing commercial/free GPT pre-trained models as saas.
|
||||
These include openai, Azure, Goodle, FB, Anthrophic etc
|
||||
# OpenAI ChatGPT Integration for Enhanced Blog Generation
|
||||
|
||||
- If you want to use chatgpt and its models, then use openai as gpt_provider
|
||||
- We plan to integrate most the accurate, widely used models as gpt providers.
|
||||
- These will also include text to image and video generations as blogging artifacts.
|
||||
|
||||
gpt_provider=openai
|
||||
|
||||
------------------------------------
|
||||
|
||||
Here are some tips for using LLMs to generate ideas:
|
||||
|
||||
- Be as specific as possible in your prompts. The more specific you are, the better the LLM will
|
||||
be able to understand what you are asking for.
|
||||
- Use keywords in your prompts. This will help the LLM to generate ideas that are relevant to your topic.
|
||||
- Try different temperatures and top_p values. These parameters control the creativity and diversity of the generated ideas.
|
||||
- Experiment with different prompts and settings to see what works best for you.
|
||||
## Introduction
|
||||
This toolkit, written in Python, integrates OpenAI's ChatGPT and other AI services for comprehensive blog generation. It allows for selecting and fine-tuning OpenAI models to suit various content creation needs, including text generation, image analysis, and speech-to-text conversion.
|
||||
|
||||
## Key Features
|
||||
- **AI-Powered Text Generation**: Leverages OpenAI's ChatGPT for creating engaging and contextually relevant text based on user inputs.
|
||||
- **Image Analysis and Detail Extraction**: Utilizes OpenAI's Vision API to analyze images and extract important details like Alt Text, Description, Title, and Caption.
|
||||
- **Dynamic Image Generation**: Generates images from textual descriptions using DALL-E 2 and DALL-E 3 models, enhancing blog visual content.
|
||||
- **Speech-to-Text Transcription**: Converts audio from YouTube videos to text, enabling easy content repurposing for blogs.
|
||||
- **Image Variation Creation**: Produces variations of existing images, offering creative flexibility and maintaining topical relevance.
|
||||
|
||||
56
lib/gpt_providers/gen_dali2_images.py
Normal file
56
lib/gpt_providers/gen_dali2_images.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from openai import OpenAI
|
||||
from loguru import logger
|
||||
import sys
|
||||
|
||||
from .save_image import save_generated_image
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=120), stop=stop_after_attempt(6))
|
||||
def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd", n=1):
|
||||
"""
|
||||
Generates images using the DALL-E 3 model based on a given text prompt.
|
||||
|
||||
Args:
|
||||
img_prompt (str): Text prompt to generate the image.
|
||||
image_dir (str): Directory where the generated image will be saved.
|
||||
size (str, optional): Size of the generated images. Defaults to "1024x1024".
|
||||
quality (str, optional): Quality of the generated images. Defaults to "hd".
|
||||
n (int, optional): Number of images to generate. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
str: Path to the saved image.
|
||||
|
||||
Raises:
|
||||
SystemExit: If an error occurs in image generation or saving.
|
||||
"""
|
||||
try:
|
||||
logger.info("Generating Dall-e-3 image for the blog.")
|
||||
client = OpenAI()
|
||||
|
||||
img_generation_response = client.images.generate(
|
||||
model="dall-e-3",
|
||||
prompt=img_prompt,
|
||||
size=size,
|
||||
quality=quality,
|
||||
n=n
|
||||
)
|
||||
# Save the generated image locally.
|
||||
try:
|
||||
img_path = save_generated_image(img_generation_response, image_dir)
|
||||
return img_path
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to Save generated image: {err}")
|
||||
|
||||
except openai.OpenAIError as e:
|
||||
logger.error(f"Dalle-3 image generation error: HTTP Status {e.http_status}, Error: {e.error}")
|
||||
sys.exit("Exiting due to Dalle-3 image generation error.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate images with Dalle3: {e}")
|
||||
sys.exit("Exiting due to a general error in image generation.")
|
||||
61
lib/gpt_providers/gen_dali3_images.py
Normal file
61
lib/gpt_providers/gen_dali3_images.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from openai import OpenAI
|
||||
from loguru import logger
|
||||
import sys
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
from .save_image import save_generated_image
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=120), stop=stop_after_attempt(6))
|
||||
def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd", n=1):
|
||||
"""
|
||||
Generates images using the DALL-E 3 model based on a given text prompt.
|
||||
|
||||
Args:
|
||||
img_prompt (str): Text prompt to generate the image.
|
||||
image_dir (str): Directory where the generated image will be saved.
|
||||
size (str, optional): Size of the generated images. Defaults to "1024x1024".
|
||||
quality (str, optional): Quality of the generated images. Defaults to "hd".
|
||||
n (int, optional): Number of images to generate. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
str: Path to the saved image.
|
||||
|
||||
Raises:
|
||||
SystemExit: If an error occurs in image generation or saving.
|
||||
"""
|
||||
try:
|
||||
logger.info("Generating Dall-e-3 image for the blog.")
|
||||
client = OpenAI()
|
||||
|
||||
img_generation_response = client.images.generate(
|
||||
model="dall-e-3",
|
||||
prompt=img_prompt,
|
||||
size=size,
|
||||
quality=quality,
|
||||
n=n
|
||||
)
|
||||
|
||||
img_path = save_generated_image(img_generation_response, image_dir)
|
||||
return img_path
|
||||
|
||||
except openai.OpenAIError as e:
|
||||
logger.error(f"Dalle-3 image generation error: HTTP Status {e.http_status}, Error: {e.error}")
|
||||
sys.exit("Exiting due to Dalle-3 image generation error.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate images with Dalle3: {e}")
|
||||
sys.exit("Exiting due to a general error in image generation.")
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
image_path = generate_dalle3_images("A futuristic cityscape", "/path/to/image/dir")
|
||||
print(f"Image generated and saved at: {image_path}")
|
||||
except SystemExit as e:
|
||||
print(f"Terminated: {e}")
|
||||
51
lib/gpt_providers/gen_variation_img.py
Normal file
51
lib/gpt_providers/gen_variation_img.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from loguru import logger
|
||||
import sys
|
||||
from PIL import Image
|
||||
from openai import OpenAI
|
||||
|
||||
def gen_new_from_given_img(img_path, image_dir, num_img=1, img_size="1024x1024", response_format="url"):
|
||||
"""
|
||||
Generates variations of a given image using OpenAI's image variation API.
|
||||
|
||||
This function takes an existing image, processes it, and generates a specified number of new images based on it.
|
||||
These generated images are variations of the original, providing creative flexibility.
|
||||
|
||||
Args:
|
||||
img_path (str): Path to the original image file.
|
||||
image_dir (str): Directory where the generated images will be saved.
|
||||
num_img (int, optional): Number of image variations to generate. Defaults to 1.
|
||||
img_size (str, optional): Size of the generated images. Defaults to "1024x1024".
|
||||
response_format (str, optional): Format in which the generated images are returned. Defaults to "url".
|
||||
|
||||
Returns:
|
||||
str: Path to the saved image variation.
|
||||
|
||||
Raises:
|
||||
SystemExit: If a critical error occurs that prevents successful execution.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Starting image variation generation for: {img_path}")
|
||||
|
||||
# Convert and prepare the image
|
||||
png = Image.open(img_path).convert('RGBA')
|
||||
background = Image.new('RGBA', png.size, (255, 255, 255))
|
||||
alpha_composite = Image.alpha_composite(background, png)
|
||||
alpha_composite.save(img_path, 'PNG', quality=80)
|
||||
logger.info("Image prepared for variation generation.")
|
||||
|
||||
client = OpenAI()
|
||||
variation_response = client.images.create_variation(
|
||||
image=open(img_path, "rb"),
|
||||
n=num_img,
|
||||
size=img_size,
|
||||
response_format=response_format
|
||||
)
|
||||
|
||||
# Saving the generated image
|
||||
generated_image_path = save_generated_image(variation_response, image_dir)
|
||||
logger.info(f"Image variation generated and saved to: {generated_image_path}")
|
||||
return generated_image_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error occurred during image variation generation: {e}")
|
||||
sys.exit(f"Exiting due to critical error: {e}")
|
||||
106
lib/gpt_providers/gpt_vision_img_details.py
Normal file
106
lib/gpt_providers/gpt_vision_img_details.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import requests
|
||||
import re
|
||||
import base64
|
||||
import os
|
||||
import sys
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def analyze_and_extract_details_from_image(image_path):
|
||||
"""
|
||||
Analyzes an image using OpenAI's Vision API to extract Alt Text, Description, Title, and Caption.
|
||||
|
||||
This function encodes an image to a base64 string and sends a request to the OpenAI API.
|
||||
It interprets the contents of the image, returning a textual description.
|
||||
|
||||
Args:
|
||||
image_path (str): Path to the image file.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary with extracted details including Alt Text, Description, Title, and Caption.
|
||||
None: If an error occurs during processing.
|
||||
|
||||
Raises:
|
||||
SystemExit: If a critical error occurs that prevents the function from executing successfully.
|
||||
"""
|
||||
try:
|
||||
logger.info("Starting image analysis using OpenAI's Vision API.")
|
||||
|
||||
def encode_image(path):
|
||||
""" Encodes an image to a base64 string. """
|
||||
with open(path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode('utf-8')
|
||||
|
||||
base64_image = encode_image(image_path)
|
||||
logger.info("Image encoded to base64 successfully.")
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": "gpt-4-vision-preview",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Analyze the given image and suggest the following: Alternative text(Alt Text), description, title, caption."
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"max_tokens": 300
|
||||
}
|
||||
|
||||
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
|
||||
assistant_message = response.json()['choices'][0]['message']['content']
|
||||
logger.info("Received response from OpenAI API.")
|
||||
|
||||
# Extracting details using regular expressions
|
||||
alt_text_match = re.search(r'Alt Text: "(.*?)"', assistant_message)
|
||||
description_match = re.search(r'Description: (.*?)\n\n', assistant_message)
|
||||
title_match = re.search(r'Title: "(.*?)"', assistant_message)
|
||||
caption_match = re.search(r'Caption: "(.*?)"', assistant_message)
|
||||
|
||||
image_details = {
|
||||
'alt_text': alt_text_match.group(1) if alt_text_match else "N/A",
|
||||
'description': description_match.group(1) if description_match else "N/A",
|
||||
'title': title_match.group(1) if title_match else "N/A",
|
||||
'caption': caption_match.group(1) if caption_match else "N/A"
|
||||
}
|
||||
|
||||
logger.info("Image analysis completed successfully.")
|
||||
return image_details
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"GPT-Vision API communication failure. Error: {e}")
|
||||
sys.exit(f"Exiting due to GPT-Vision API communication failure: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error occurred during image analysis: {e}")
|
||||
sys.exit(f"Exiting due to an unexpected error: {e}")
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
image_path = "path/to/your/image.jpg"
|
||||
try:
|
||||
details = analyze_and_extract_details_from_image(image_path)
|
||||
if details:
|
||||
print(f"Extracted image details: {details}")
|
||||
else:
|
||||
print("No details extracted from the image.")
|
||||
except SystemExit as e:
|
||||
print(f"Terminated: {e}")
|
||||
63
lib/gpt_providers/openai_chat_completion.py
Normal file
63
lib/gpt_providers/openai_chat_completion.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import time
|
||||
import logging
|
||||
import openai
|
||||
import os
|
||||
|
||||
# Configure standard logging
|
||||
logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s')
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def openai_chatgpt(prompt, model="gpt-4-1106-preview", temperature=0.2, max_tokens=4096, top_p=0.9, n=1):
|
||||
"""
|
||||
Wrapper function for OpenAI's ChatGPT completion.
|
||||
|
||||
Args:
|
||||
prompt (str): The input text to generate completion for.
|
||||
model (str, optional): Model to be used for the completion. Defaults to "gpt-4-1106-preview".
|
||||
temperature (float, optional): Controls randomness. Lower values make responses more deterministic. Defaults to 0.2.
|
||||
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 8192.
|
||||
top_p (float, optional): Controls diversity. Defaults to 0.9.
|
||||
n (int, optional): Number of completions to generate. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
str: The generated text completion.
|
||||
|
||||
Raises:
|
||||
SystemExit: If an API error, connection error, or rate limit error occurs.
|
||||
"""
|
||||
# Wait for 10 seconds to comply with rate limits
|
||||
for _ in range(10):
|
||||
time.sleep(1)
|
||||
|
||||
try:
|
||||
client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=max_tokens,
|
||||
n=n,
|
||||
top_p=top_p
|
||||
# Additional parameters can be included here
|
||||
)
|
||||
return response.choices[0].message.content
|
||||
|
||||
except openai.APIError as e:
|
||||
logger.error(f"OpenAI API Error: {e}")
|
||||
raise SystemExit from e
|
||||
except openai.APIConnectionError as e:
|
||||
logger.error(f"Failed to connect to OpenAI API: {e}")
|
||||
raise SystemExit from e
|
||||
except openai.RateLimitError as e:
|
||||
logger.error(f"Rate limit exceeded on OpenAI API request: {e}")
|
||||
raise SystemExit from e
|
||||
except Exception as err:
|
||||
logger.error(f"OpenAI error: {err}")
|
||||
raise SystemExit from e
|
||||
53
lib/gpt_providers/openai_chat_completion_streaming.py
Normal file
53
lib/gpt_providers/openai_chat_completion_streaming.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import sys
|
||||
import logging
|
||||
import openai
|
||||
|
||||
# Configure standard logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def openai_chatgpt_streaming_text(user_prompt):
|
||||
"""
|
||||
Uses streaming functionality to get real-time output from OpenAI's GPT model.
|
||||
|
||||
Args:
|
||||
user_prompt (str): The prompt to send to the model.
|
||||
|
||||
Returns:
|
||||
str: The complete text generated by the model in response to the prompt.
|
||||
|
||||
Raises:
|
||||
SystemExit: If an error occurs in connecting to the OpenAI API or during streaming.
|
||||
"""
|
||||
try:
|
||||
client = openai.OpenAI()
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo-16k",
|
||||
messages=[{"role": "user", "content": user_prompt}],
|
||||
max_tokens=8192,
|
||||
temperature=0.9,
|
||||
n=1,
|
||||
stream=True
|
||||
)
|
||||
|
||||
collected_events = []
|
||||
completion_text = ''
|
||||
|
||||
logger.info("Starting to receive streaming responses...")
|
||||
for chunk in response:
|
||||
collected_events.append(chunk) # Save the event response
|
||||
event_text = chunk.choices[0].delta.content # Extract the text
|
||||
completion_text += event_text # Append the text
|
||||
sys.stdout.write(event_text)
|
||||
sys.stdout.flush()
|
||||
|
||||
logger.info("Completed receiving streaming responses.")
|
||||
return completion_text
|
||||
|
||||
except openai.OpenAIError as e:
|
||||
logger.error(f"OpenAI API Error: {e}")
|
||||
sys.exit("Exiting due to OpenAI API error.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error during streaming: {e}")
|
||||
sys.exit("Exiting due to an unexpected error.")
|
||||
@@ -20,7 +20,12 @@ import tempfile
|
||||
from html2image import Html2Image
|
||||
import datetime
|
||||
from PIL import Image
|
||||
import moviepy.editor as mp
|
||||
import requests
|
||||
from moviepy.editor import AudioFileClip
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from ..gpt_online_researcher import do_online_research
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
@@ -29,8 +34,6 @@ logger.add(sys.stdout,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
|
||||
def analyze_and_extract_details_from_image(image_path):
|
||||
"""
|
||||
Analyzes an image using OpenAI's Vision API and extracts Alt Text, Description, Title, and Caption.
|
||||
@@ -103,12 +106,14 @@ def analyze_and_extract_details_from_image(image_path):
|
||||
return image_details
|
||||
|
||||
except requests.RequestException as e:
|
||||
sys.exit(f"Error: Failed to communicate with OpenAI API. Error: {e}")
|
||||
#sys.exit(f"Error: GPT-Vision: Failed to communicate with OpenAI API. Error: {e}")
|
||||
logger.error(f"Error: GPT-Vision: Failed to communicate with OpenAI API. Error: {e}")
|
||||
except Exception as e:
|
||||
sys.exit(f"Error occurred: {e}")
|
||||
#sys.exit(f"Error occurred- GPT-Vision: {e}")
|
||||
logger.error(f"Error occurred- GPT-Vision: {e}")
|
||||
|
||||
|
||||
def openai_chatgpt(prompt, model="gpt-3.5-turbo-16k", temperature=0.2, max_tokens=8192, top_p=0.9, n=1):
|
||||
def openai_chatgpt(prompt, model="gpt-4-1106-preview", temperature=0.2, max_tokens=4096, top_p=0.9, n=1):
|
||||
"""
|
||||
Wrapper function for openai chat Completion
|
||||
"""
|
||||
@@ -119,6 +124,10 @@ def openai_chatgpt(prompt, model="gpt-3.5-turbo-16k", temperature=0.2, max_token
|
||||
|
||||
try:
|
||||
client = OpenAI()
|
||||
except Exception as err:
|
||||
print("Error: OpenAI Client.")
|
||||
exit(1)
|
||||
try:
|
||||
# using OpenAI's Completion module that helps execute any tasks involving text
|
||||
response = client.chat.completions.create(
|
||||
# model name used, there are many other models available under the umbrella of GPT-3
|
||||
@@ -142,6 +151,8 @@ def openai_chatgpt(prompt, model="gpt-3.5-turbo-16k", temperature=0.2, max_token
|
||||
except openai.RateLimitError as e:
|
||||
#Handle rate limit error (we recommend using exponential backoff)
|
||||
SystemError(f"OpenAI API request exceeded rate limit: {e}")
|
||||
except Exception as err:
|
||||
SystemError(f"OpenAI client Error: {err}")
|
||||
|
||||
return response.choices[0].message.content
|
||||
|
||||
@@ -231,39 +242,57 @@ def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd"
|
||||
return img_path
|
||||
|
||||
|
||||
def speech_to_text(video_url):
|
||||
""" Common openai function for speech to text. """
|
||||
client = OpenAI()
|
||||
|
||||
def speech_to_text(video_url, output_path='.'):
|
||||
""" Transcribes speech to text from a YouTube video URL. """
|
||||
try:
|
||||
# Download YouTube video
|
||||
logger.info(f"Download YouTube video: {video_url}")
|
||||
# Create a YouTube object
|
||||
print(f"Accessing YouTube URL: {video_url}")
|
||||
yt = YouTube(video_url)
|
||||
stream = yt.streams.filter(only_audio=True).first()
|
||||
|
||||
# Save the video in a temporary file
|
||||
logger.info(f"Finished Downloading, Saving video for transcription.")
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
|
||||
temp_file_name = temp_file.name
|
||||
# Select the highest quality audio stream
|
||||
print("Fetching audio stream. Select the highest quality audio stream")
|
||||
audio_stream = yt.streams.filter(only_audio=True).first()
|
||||
|
||||
stream.download(output_path=os.path.dirname(temp_file_name), filename=os.path.basename(temp_file_name))
|
||||
try:
|
||||
# Transcribe the video using OpenAI's Whisper API
|
||||
logger.info(f"Transcribe the video using OpenAI's Whisper API")
|
||||
with open(temp_file_name, "rb") as audio_file:
|
||||
if audio_stream is None:
|
||||
print("No audio stream found for this video.")
|
||||
return
|
||||
else:
|
||||
# Download the audio stream
|
||||
print(f"Downloading audio for: {yt.title}")
|
||||
audio_file = audio_stream.download(output_path)
|
||||
print(f"Downloaded: {yt.title} to {output_path}")
|
||||
|
||||
try:
|
||||
# Check if the audio file size is less than 24MB
|
||||
max_file_size = 24 * 1024 * 1024 # 24MB in bytes
|
||||
file_size = os.path.getsize(audio_file)
|
||||
if file_size > max_file_size:
|
||||
print("Error: File size exceeds 24MB limit.")
|
||||
exit(1)
|
||||
|
||||
# File uploads are currently limited to 25 MB and the following input
|
||||
# file types are supported: mp3, mp4, mpeg, mpga, m4a, wav, and webm.
|
||||
try:
|
||||
client = OpenAI()
|
||||
except Exception as err:
|
||||
SystemExit("Unable to get openai client object: {err}")
|
||||
|
||||
print("Transcribing using Openai whisper.")
|
||||
transcript = client.audio.transcriptions.create(
|
||||
model="whisper-1",
|
||||
file=audio_file
|
||||
model="whisper-1",
|
||||
file=open(audio_file, "rb"),
|
||||
response_format="text"
|
||||
)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to transcribe using whisper model: {err}")
|
||||
|
||||
logger.info("Finished Transcribing. Creating a blog from the transcript.")
|
||||
# Remove the temporary file after transcription
|
||||
os.remove(temp_file_name)
|
||||
return(transcript)
|
||||
return transcript
|
||||
except Exception as err:
|
||||
print(f"Failed in whisper transcription: {err}")
|
||||
exit(1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error: speech-to-text, Failed to transcribe url: {video_url} with error: {e}")
|
||||
print(f"YT video download, An error occurred: {e}")
|
||||
exit(1)
|
||||
os.remove(audio_file)
|
||||
|
||||
|
||||
# The idea is to download images from other blogs and recreate from it.
|
||||
|
||||
35
lib/gpt_providers/save_image.py
Normal file
35
lib/gpt_providers/save_image.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import datetime
|
||||
import os
|
||||
import requests
|
||||
from PIL import Image
|
||||
import logging
|
||||
|
||||
def save_generated_image(img_generation_response, image_dir):
|
||||
"""
|
||||
Save generated images for blog, ensuring unique names for SEO.
|
||||
"""
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
generated_image_name = f"generated_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.png"
|
||||
generated_image_filepath = os.path.join(image_dir, generated_image_name)
|
||||
generated_image_url = img_generation_response.data[0].url
|
||||
|
||||
logger.info(f"Fetch the image from url: {generated_image_url}")
|
||||
try:
|
||||
response = requests.get(generated_image_url, stream=True)
|
||||
response.raise_for_status()
|
||||
with open(generated_image_filepath, "wb") as image_file:
|
||||
image_file.write(response.content)
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Failed to get generated image content: {e}")
|
||||
return None
|
||||
|
||||
logger.info(f"Saved image at path: {generated_image_filepath}")
|
||||
|
||||
if os.environ.get('DISPLAY', ''): # Check if display is supported
|
||||
img = Image.open(generated_image_filepath)
|
||||
img.show()
|
||||
|
||||
return generated_image_filepath
|
||||
|
||||
88
lib/gpt_providers/stt_audio_blog.py
Normal file
88
lib/gpt_providers/stt_audio_blog.py
Normal file
@@ -0,0 +1,88 @@
|
||||
from pytube import YouTube
|
||||
import os
|
||||
import sys
|
||||
from loguru import logger
|
||||
from openai import OpenAI
|
||||
from tqdm import tqdm
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
|
||||
def progress_function(stream, chunk, bytes_remaining):
|
||||
# Calculate the percentage completion
|
||||
current = ((stream.filesize - bytes_remaining) / stream.filesize)
|
||||
progress_bar.update(current - progress_bar.n) # Update the progress bar
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def speech_to_text(video_url, output_path='.'):
|
||||
"""
|
||||
Transcribes speech to text from a YouTube video URL using OpenAI's Whisper model.
|
||||
|
||||
Args:
|
||||
video_url (str): URL of the YouTube video to transcribe.
|
||||
output_path (str, optional): Directory where the audio file will be saved. Defaults to '.'.
|
||||
|
||||
Returns:
|
||||
str: The transcribed text from the video.
|
||||
|
||||
Raises:
|
||||
SystemExit: If a critical error occurs that prevents successful execution.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Accessing YouTube URL: {video_url}")
|
||||
yt = YouTube(video_url, on_progress_callback=progress_function)
|
||||
|
||||
logger.info("Fetching the highest quality audio stream")
|
||||
audio_stream = yt.streams.filter(only_audio=True).first()
|
||||
|
||||
if audio_stream is None:
|
||||
logger.warning("No audio stream found for this video.")
|
||||
return None
|
||||
|
||||
#logger.info(f"Downloading audio for: {yt.title}")
|
||||
global progress_bar
|
||||
progress_bar = tqdm(total=1.0, unit='iB', unit_scale=True, desc=yt.title)
|
||||
audio_file = audio_stream.download(output_path)
|
||||
progress_bar.close()
|
||||
logger.info(f"Audio downloaded: {yt.title} to {output_path}")
|
||||
|
||||
# Checking file size
|
||||
max_file_size = 24 * 1024 * 1024 # 24MB
|
||||
file_size = os.path.getsize(audio_file)
|
||||
# Convert file size to MB for logging
|
||||
file_size_MB = file_size / (1024 * 1024) # Convert bytes to MB
|
||||
logger.info(f"Downloaded Audio Size is: {file_size_MB:.2f} MB")
|
||||
if file_size > max_file_size:
|
||||
logger.error("File size exceeds 24MB limit.")
|
||||
sys.exit("File size limit exceeded.")
|
||||
|
||||
try:
|
||||
logger.info("Initializing OpenAI client for transcription.")
|
||||
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
||||
|
||||
logger.info("Transcribing using OpenAI's Whisper model.")
|
||||
transcript = client.audio.transcriptions.create(
|
||||
model="whisper-1",
|
||||
file=open(audio_file, "rb"),
|
||||
response_format="text"
|
||||
)
|
||||
logger.info("\nYouTube video transcription:\n\n{transcript}\n")
|
||||
return transcript, yt.title
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed in Whisper transcription: {e}")
|
||||
sys.exit("Transcription failure.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred during YouTube video processing: {e}")
|
||||
sys.exit("Video processing failure.")
|
||||
|
||||
finally:
|
||||
if os.path.exists(audio_file):
|
||||
os.remove(audio_file)
|
||||
logger.info("Temporary audio file removed.")
|
||||
74
lib/gpt_providers/stt_audio_blog.py.bk
Normal file
74
lib/gpt_providers/stt_audio_blog.py.bk
Normal file
@@ -0,0 +1,74 @@
|
||||
from pytube import YouTube
|
||||
import os
|
||||
import sys
|
||||
from loguru import logger
|
||||
from openai import OpenAI
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def speech_to_text(video_url, output_path='.'):
|
||||
"""
|
||||
Transcribes speech to text from a YouTube video URL using OpenAI's Whisper model.
|
||||
|
||||
Args:
|
||||
video_url (str): URL of the YouTube video to transcribe.
|
||||
output_path (str, optional): Directory where the audio file will be saved. Defaults to '.'.
|
||||
|
||||
Returns:
|
||||
str: The transcribed text from the video.
|
||||
|
||||
Raises:
|
||||
SystemExit: If a critical error occurs that prevents successful execution.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Accessing YouTube URL: {video_url}")
|
||||
yt = YouTube(video_url)
|
||||
|
||||
logger.info("Fetching the highest quality audio stream")
|
||||
audio_stream = yt.streams.filter(only_audio=True).first()
|
||||
|
||||
if audio_stream is None:
|
||||
logger.warning("No audio stream found for this video.")
|
||||
return None
|
||||
|
||||
logger.info(f"Downloading audio for: {yt.title}")
|
||||
audio_file = audio_stream.download(output_path)
|
||||
logger.info(f"Audio downloaded: {yt.title} to {output_path}")
|
||||
|
||||
# Checking file size
|
||||
max_file_size = 24 * 1024 * 1024 # 24MB
|
||||
logger.info(f"Downloaded Audio Size is: {max_file_size}")
|
||||
file_size = os.path.getsize(audio_file)
|
||||
if file_size > max_file_size:
|
||||
logger.error("File size exceeds 24MB limit.")
|
||||
sys.exit("File size limit exceeded.")
|
||||
|
||||
try:
|
||||
logger.info("Initializing OpenAI client for transcription.")
|
||||
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
||||
|
||||
logger.info("Transcribing using OpenAI's Whisper model.")
|
||||
transcript = client.audio.transcriptions.create(
|
||||
model="whisper-1",
|
||||
file=open(audio_file, "rb"),
|
||||
response_format="text"
|
||||
)
|
||||
return transcript, yt.title
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed in Whisper transcription: {e}")
|
||||
sys.exit("Transcription failure.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred during YouTube video processing: {e}")
|
||||
sys.exit("Video processing failure.")
|
||||
|
||||
finally:
|
||||
if os.path.exists(audio_file):
|
||||
os.remove(audio_file)
|
||||
logger.info("Temporary audio file removed.")
|
||||
124
lib/main_audio_to_blog.py
Normal file
124
lib/main_audio_to_blog.py
Normal file
@@ -0,0 +1,124 @@
|
||||
import json
|
||||
import os
|
||||
import datetime #I wish
|
||||
import sys
|
||||
|
||||
import openai
|
||||
from tqdm import tqdm, trange
|
||||
import time
|
||||
import re
|
||||
from textwrap import dedent
|
||||
import nltk
|
||||
nltk.download('punkt', quiet=True)
|
||||
from nltk.corpus import stopwords
|
||||
nltk.download('stopwords', quiet=True)
|
||||
|
||||
from .write_blogs_from_youtube_videos import youtube_to_blog
|
||||
from .wordpress_blog_uploader import compress_image, upload_blog_post, upload_media
|
||||
from .gpt_online_researcher import do_online_research
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
# fixme: Remove the hardcoding, need add another option OR in config ?
|
||||
image_dir = "blog_images"
|
||||
image_dir = os.path.join(os.getcwd(), image_dir)
|
||||
# TBD: This can come from config file.
|
||||
output_path = "blogs"
|
||||
output_path = os.path.join(os.getcwd(), output_path)
|
||||
wordpress_url = 'https://latestaitools.in/'
|
||||
wordpress_username = 'upaudel750'
|
||||
wordpress_password = 'YvCS VbzQ QSp8 4XZe 0DUw Myys'
|
||||
|
||||
|
||||
def generate_youtube_blog(yt_url_list, output_format="markdown"):
|
||||
"""Takes a list of youtube videos and generates blog for each one of them.
|
||||
"""
|
||||
# Use to store the blog in a string, to save in a *.md file.
|
||||
blog_markdown_str = ""
|
||||
for a_yt_url in yt_url_list:
|
||||
try:
|
||||
logger.info(f"Starting to write blog on URL: {a_yt_url}")
|
||||
yt_blog = youtube_to_blog(a_yt_url)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in youtube_to_blog: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
logger.info("Starting with online research for URL title.")
|
||||
research_report = do_online_research(yt_blog)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in do_online_research: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Note: Check if the order of input matters for your function
|
||||
logger.info("Preparing a blog content from audio script and online research content...")
|
||||
blog_with_research(research_report, yt_blog)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in blog_with_research: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Get the title and meta description of the blog.
|
||||
blog_meta_desc = generate_blog_description(yt_blog)
|
||||
title = generate_blog_title(blog_meta_desc)
|
||||
logger.info(f"Title is {title} and description is {blog_meta_desc}")
|
||||
blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
|
||||
# Get blog tags and categories.
|
||||
blog_tags = get_blog_tags(blog_meta_desc)
|
||||
logger.info(f"Blog tags are: {blog_tags}")
|
||||
blog_categories = get_blog_categories(blog_meta_desc)
|
||||
logger.info(f"Blog categories are: {blog_categories}")
|
||||
|
||||
# Generate an introduction for the blog
|
||||
blog_intro = get_blog_intro(title, yt_blog)
|
||||
logger.info(f"The Blog intro is:\n {blog_intro}")
|
||||
blog_markdown_str = blog_markdown_str + "\n\n" + f"{blog_intro}" + "\n\n"
|
||||
|
||||
# Generate an image based on meta description
|
||||
logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
|
||||
main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
|
||||
|
||||
# Get a variation of the yt url screenshot to use in the blog.
|
||||
#varied_img_path = gen_new_from_given_img(yt_img_path, image_dir)
|
||||
#logger.info(f"Image path: {main_img_path} and varied path: {varied_img_path}")
|
||||
#blog_markdown_str = blog_markdown_str + f'})' + '_Image Caption_'
|
||||
|
||||
#stbdiff_img_path = generate_image(yt_img_path, image_dir, "stable_diffusion")
|
||||
#logger.info(f"Image path: {main_img_path} from stable diffusion: {stbdiff_img_path}")
|
||||
#blog_markdown_str = blog_markdown_str + f'})' + f'_{title}_'
|
||||
|
||||
# Add the body of the blog content.
|
||||
blog_markdown_str = blog_markdown_str + "\n\n" + f'{yt_blog}' + "\n\n"
|
||||
|
||||
# Get the Conclusion of the blog, by passing the generated blog.
|
||||
blog_conclusion = get_blog_conclusion(blog_markdown_str)
|
||||
# TBD: Add another image.
|
||||
blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n\n" + f"{blog_conclusion}" + "\n"
|
||||
|
||||
# Proofread the blog, edit and remove dubplicates and refine it further.
|
||||
# Presently, fixing the blog keywords to be tags and categories.
|
||||
blog_keywords = f"{blog_tags} + {blog_categories}"
|
||||
blog_markdown_str = blog_proof_editor(blog_markdown_str, blog_keywords)
|
||||
|
||||
# Check the type of blog format needed by the user.
|
||||
if 'html' in output_format:
|
||||
blog_markdown_str = convert_tomarkdown_format(blog_markdown_str)
|
||||
elif 'markdown' in output_path:
|
||||
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
|
||||
|
||||
# Try to save the blog content in a file, in whichever format. Just dump it.
|
||||
try:
|
||||
save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
|
||||
except Exception as err:
|
||||
logger.error("Failed to Save blog content: {blog_markdown_str}")
|
||||
|
||||
except Exception as e:
|
||||
# raise assertionerror
|
||||
logger.error(f"Error: Failed to generate_youtube_blog: {e}")
|
||||
exit(1)
|
||||
113
lib/main_keywords_to_blog.py
Normal file
113
lib/main_keywords_to_blog.py
Normal file
@@ -0,0 +1,113 @@
|
||||
|
||||
|
||||
def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
|
||||
wordpress=False, research_online=False, output_format="HTML"):
|
||||
"""
|
||||
This function will take a blog Topic to first generate sections for it
|
||||
and then generate content for each section.
|
||||
"""
|
||||
# Use to store the blog in a string, to save in a *.md file.
|
||||
blog_markdown_str = ""
|
||||
|
||||
# TBD: Check if the generated topics are equal to what user asked.
|
||||
blog_topic_arr = generate_blog_topics(blog_keywords, num_blogs, niche)
|
||||
logger.info(f"Generated Blog Topics:---- \n{blog_topic_arr}\n")
|
||||
# Split the string at newlines
|
||||
blog_topic_arr = blog_topic_arr.split('\n')
|
||||
|
||||
# For each of blog topic, generate content.
|
||||
for a_blog_topic in blog_topic_arr:
|
||||
# if md/html
|
||||
a_blog_topic = a_blog_topic.replace('"', '')
|
||||
a_blog_topic = re.sub(r'^[\d.\s]+', '', a_blog_topic)
|
||||
blog_markdown_str = "# " + a_blog_topic + "\n\n"
|
||||
|
||||
# Get the introduction specific to blog title and sub topics.
|
||||
tpc_outlines = generate_topic_outline(a_blog_topic, num_subtopics)
|
||||
tpc_outlines = tpc_outlines.split("\n")
|
||||
|
||||
blog_intro = get_blog_intro(a_blog_topic, tpc_outlines)
|
||||
logger.info(f"The intro is:\n{blog_intro}")
|
||||
blog_markdown_str = blog_markdown_str + "### Introduction" + "\n\n" + f"{blog_intro}" + "\n\n"
|
||||
|
||||
# Now, for each blog we have sub topic. Generate content for each of the sub topic.
|
||||
for a_outline in tpc_outlines:
|
||||
a_outline = a_outline.replace('"', '')
|
||||
logger.info(f"Generating content for sub-topic: {a_outline}")
|
||||
sub_topic_content = generate_topic_content(blog_keywords, a_outline)
|
||||
# a_outline is sub topic heading, hence part ToC also.
|
||||
#blog_markdown_str = blog_markdown_str + "\n\n" + f"### {a_outline}" + "\n\n"
|
||||
blog_markdown_str = blog_markdown_str + "\n" + f"\n {sub_topic_content}" + "\n\n"
|
||||
|
||||
# Get the Conclusion of the blog, by passing the generated blog.
|
||||
blog_conclusion = get_blog_conclusion(blog_markdown_str)
|
||||
blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n" + f"{blog_conclusion}" + "\n"
|
||||
|
||||
# logger.info/check the final blog content.
|
||||
logger.info(f"Final blog content: {blog_markdown_str}")
|
||||
|
||||
#if research_online:
|
||||
# # Call on the got-researcher, tavily apis for this. So many apis floating around.
|
||||
# report = do_online_research_on(blog_keywords)
|
||||
# blog_markdown_str = blog_with_research(report, blog_markdown_str)
|
||||
|
||||
blog_meta_desc = generate_blog_description(blog_markdown_str)
|
||||
logger.info(f"\nThe blog meta description is:{blog_meta_desc}\n")
|
||||
|
||||
# Generate an image based on meta description
|
||||
logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
|
||||
main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
|
||||
|
||||
blog_tags = get_blog_tags(blog_markdown_str)
|
||||
logger.info(f"\nBlog tags for generated content: {blog_tags}\n")
|
||||
|
||||
blog_categories = get_blog_categories(blog_markdown_str)
|
||||
logger.info(f"Generated blog categories: {blog_categories}\n")
|
||||
|
||||
# Use chatgpt to convert the text into HTML or markdown.
|
||||
if 'html' in output_format:
|
||||
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
|
||||
|
||||
# Check if blog needs to be posted on wordpress.
|
||||
if wordpress:
|
||||
# Fixme: Fetch all tags and categories to check, if present ones are present and
|
||||
# use them else create new ones. Its better to use chatgpt than string comparison.
|
||||
# Similar tags and categories will be missed.
|
||||
# blog_categories =
|
||||
# blog_tags =
|
||||
logger.info("Uploading the blog to wordpress.\n")
|
||||
main_img_path = compress_image(main_img_path, quality=85)
|
||||
try:
|
||||
img_details = analyze_and_extract_details_from_image(main_img_path)
|
||||
alt_text = img_details.get('alt_text')
|
||||
img_description = img_details.get('description')
|
||||
img_title = img_details.get('title')
|
||||
caption = img_details.get('caption')
|
||||
try:
|
||||
media = upload_media(wordpress_url, wordpress_username, wordpress_password,
|
||||
main_img_path, alt_text, img_description, img_title, caption)
|
||||
except Exception as err:
|
||||
sys.exit(f"Error occurred in upload_media: {err}")
|
||||
except Exception as e:
|
||||
sys.exit(f"Error occurred in analyze_and_extract_details_from_image: {e}")
|
||||
|
||||
# Then create the post with the uploaded media as the featured image
|
||||
media_id = media['id']
|
||||
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
|
||||
try:
|
||||
upload_blog_post(wordpress_url, wordpress_username, wordpress_password, a_blog_topic,
|
||||
blog_markdown_str, media_id, blog_meta_desc, blog_categories, blog_tags, status='publish')
|
||||
except Exception as err:
|
||||
sys.exit(f"Failed to upload blog to wordpress.Error: {err}")
|
||||
|
||||
# TBD: Save the blog content as a .md file. Markdown or HTML ?
|
||||
save_blog_to_file(blog_markdown_str,
|
||||
a_blog_topic,
|
||||
blog_meta_desc, blog_tags,
|
||||
blog_categories, main_img_path)
|
||||
|
||||
# Now, we need perform some *basic checks on the blog content, such as:
|
||||
# is_content_ai_generated.py, plagiarism_checker_from_known_sources.py
|
||||
# seo_analyzer.py . These are present in the lib folder.
|
||||
# prompt: Rewrite, improve and paraphrase [text] and use headings and subheadings
|
||||
# to break up the content and make it easier to read using the keyword [keyword].
|
||||
76
lib/save_blog_to_file.py
Normal file
76
lib/save_blog_to_file.py
Normal file
@@ -0,0 +1,76 @@
|
||||
def save_blog_to_file(blog_content, blog_title,
|
||||
blog_meta_desc, blog_tags, blog_categories, main_img_path, file_type="md"):
|
||||
""" Common function to save the generated blog to a file.
|
||||
arg: file_type can be md or html
|
||||
"""
|
||||
# Convert the spaces in blog_title with dash
|
||||
logger.info(f"The blog will be saved at: {output_path}")
|
||||
logger.debug(f"Blog Title is: {blog_title}")
|
||||
blog_title_md = blog_title
|
||||
regex = re.compile('[^a-zA-Z0-9- ]')
|
||||
blog_title_md = regex.sub('', blog_title_md)
|
||||
blog_title= blog_title.replace(":", "")
|
||||
blog_title_md = re.sub('--+', '-', blog_title_md)
|
||||
blog_title_md = blog_title_md.replace(' ', '-')
|
||||
blog_title_md = remove_stop_words(blog_title_md)
|
||||
|
||||
if ':' in blog_meta_desc:
|
||||
blog_meta_desc = blog_meta_desc.split(':')[1].strip()
|
||||
|
||||
if not os.path.exists(output_path):
|
||||
logger.error("Error: Blog output directory is set to {output_path}, which Does Not Exist.")
|
||||
|
||||
# Different output formats are plaintext, html and markdown.
|
||||
if file_type in "md":
|
||||
logger.info(f"Writing/Saving the resultant blog content in Markdown format.")
|
||||
# fill the Front Matter as below at the top of the post: https://jekyllrb.com/docs/front-matter/
|
||||
# date: YYYY-MM-DD HH:MM:SS +/-TTTT
|
||||
from zoneinfo import ZoneInfo
|
||||
tz=ZoneInfo('Asia/Kolkata')
|
||||
dtobj = datetime.datetime.now(tz=ZoneInfo('Asia/Kolkata'))
|
||||
formatted_date = f"{dtobj.strftime('%Y-%m-%d %H:%M:%S %z')}"
|
||||
|
||||
blog_frontmatter = f"""\
|
||||
---
|
||||
title: {blog_title}
|
||||
date: {formatted_date}
|
||||
categories: [{blog_categories}]
|
||||
tags: [{blog_tags}]
|
||||
description: {blog_meta_desc}
|
||||
img_path: '/assets/'
|
||||
image:
|
||||
path: {os.path.basename(main_img_path)}
|
||||
alt: {blog_title}
|
||||
---\n\n"""
|
||||
|
||||
# Create a new file named YYYY-MM-DD-TITLE.EXTENSION and put it in the _posts of the root directory.
|
||||
# Please note that the EXTENSION must be one of md or markdown
|
||||
blog_output_path = os.path.join(
|
||||
output_path,
|
||||
f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
|
||||
)
|
||||
# Save the generated blog content to a file.
|
||||
try:
|
||||
with open(blog_output_path, "w") as f:
|
||||
f.write(dedent(blog_frontmatter))
|
||||
f.write(blog_content)
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to write blog content: {e}")
|
||||
logger.info(f"\nSuccessfully saved and Posted blog at: {blog_output_path,}\n")
|
||||
|
||||
|
||||
# Helper function
|
||||
def remove_stop_words(sentence):
|
||||
# Tokenize the sentence into words
|
||||
words = nltk.word_tokenize(sentence)
|
||||
|
||||
# Get the list of English stop words
|
||||
stop_words = set(stopwords.words('english'))
|
||||
|
||||
# Remove stop words from the sentence
|
||||
filtered_words = [word for word in words if word.lower() not in stop_words]
|
||||
|
||||
# Join the filtered words back into a sentence
|
||||
filtered_sentence = ' '.join(filtered_words)
|
||||
|
||||
return filtered_sentence
|
||||
@@ -5,63 +5,62 @@ from PIL import Image
|
||||
from stability_sdk import client
|
||||
import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
|
||||
|
||||
# Our Host URL should not be prepended with "https" nor should it have a trailing slash.
|
||||
# Set the host URL environment variable. Ensure it doesn't have 'https' or a trailing slash.
|
||||
os.environ['STABILITY_HOST'] = 'grpc.stability.ai:443'
|
||||
|
||||
# Sign up for an account at the following link to get an API Key.
|
||||
# Ensure you sign up for an account to obtain an API key:
|
||||
# https://platform.stability.ai/
|
||||
# Click on the following link once you have created an account to be taken to your API Key.
|
||||
# Your API key can be found here after account creation:
|
||||
# https://platform.stability.ai/account/keys
|
||||
|
||||
|
||||
def generate_stable_diffusion_image(prompt, image_dir):
|
||||
# Set up our connection to the API.
|
||||
# Check out the following link for a list of available engines:
|
||||
# https://platform.stability.ai/docs/features/api-parameters#engine
|
||||
stability_api = client.StabilityInference(
|
||||
key=os.environ['STABILITY_KEY'], # API Key reference.
|
||||
verbose=True, # Print debug messages.
|
||||
engine="stable-diffusion-xl-1024-v1-0", # Set the engine to use for generation.
|
||||
)
|
||||
"""
|
||||
Generate images using Stable Diffusion API based on a given prompt.
|
||||
|
||||
# Set up our initial generation parameters.
|
||||
answers = stability_api.generate(
|
||||
prompt=prompt,
|
||||
seed=4253978046, # If a seed is provided, the resulting generated image will be deterministic.
|
||||
# What this means is that as long as all generation parameters remain the same,
|
||||
# you can always recall the same image simply by generating it again.
|
||||
# Note: This isn't quite the case for Clip Guided generations, which we'll tackle in a future example notebook.
|
||||
steps=50, # Amount of inference steps performed on image generation. Defaults to 30.
|
||||
cfg_scale=7.0,
|
||||
# Influences how strongly your generation is guided to match your prompt.
|
||||
# Setting this value higher increases the strength in which it tries to match your prompt.
|
||||
# Defaults to 7.0 if not specified.
|
||||
width=1024, # Generation width, defaults to 512 if not included.
|
||||
height=1024, # Generation height, defaults to 512 if not included.
|
||||
samples=1, # Number of images to generate, defaults to 1 if not included.
|
||||
sampler=generation.SAMPLER_K_DPMPP_2M
|
||||
# Choose which sampler we want to denoise our generation with.
|
||||
# Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
|
||||
# (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral,
|
||||
# k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
|
||||
)
|
||||
Args:
|
||||
prompt (str): The prompt to generate the image.
|
||||
image_dir (str): The directory where the image will be saved.
|
||||
|
||||
# Set up our warning to print to the console if the adult content classifier is tripped.
|
||||
# If adult content classifier is not tripped, save generated images.
|
||||
for resp in answers:
|
||||
for artifact in resp.artifacts:
|
||||
if artifact.finish_reason == generation.FILTER:
|
||||
warnings.warn(
|
||||
"Your request activated the API's safety filters and could not be processed."
|
||||
"Please modify the prompt and try again.")
|
||||
if artifact.type == generation.ARTIFACT_IMAGE:
|
||||
img = Image.open(io.BytesIO(artifact.binary))
|
||||
img_name = image_dir + str(artifact.seed) + ".png"
|
||||
img.show()
|
||||
img.save(img_name)
|
||||
# Save our generated images with their seed number as the filename.
|
||||
Raises:
|
||||
Warning: If the adult content classifier is triggered.
|
||||
Exception: For any issues during image generation or saving.
|
||||
"""
|
||||
try:
|
||||
# Initialize the StabilityInference client with the API key and other settings.
|
||||
stability_api = client.StabilityInference(
|
||||
key=os.environ['STABILITY_KEY'], # Reference to the API key.
|
||||
verbose=True, # Enable verbose mode for debug messages.
|
||||
engine="stable-diffusion-xl-1024-v1-0", # Engine used for generation.
|
||||
)
|
||||
|
||||
prompt = "An image of a digital marketing campaign with various elements such as social media ads, email marketing, data analysis, and customer interaction. The image should depict the integration of generative AI technologies, such as machine learning algorithms and neural networks, into the digital marketing process. It should showcase how these technologies revolutionize the field by enhancing efficiency, personalization, creativity, decision making, and customer experience. The image should also illustrate the potential for better return on investment (ROI) and hyper-personalization through generative AI in digital marketing."
|
||||
# Generating the image with specified parameters.
|
||||
answers = stability_api.generate(
|
||||
prompt=prompt,
|
||||
seed=4253978046, # Deterministic seed for reproducible results.
|
||||
steps=50, # Number of inference steps.
|
||||
cfg_scale=7.0, # Strength of prompt matching.
|
||||
width=1024, height=1024, # Image dimensions.
|
||||
samples=1, # Number of images to generate.
|
||||
sampler=generation.SAMPLER_K_DPMPP_2M # Denoising sampler selection.
|
||||
)
|
||||
|
||||
# Process responses and save images.
|
||||
for resp in answers:
|
||||
for artifact in resp.artifacts:
|
||||
if artifact.finish_reason == generation.FILTER:
|
||||
warnings.warn(
|
||||
"Request activated safety filters. Modify the prompt and retry."
|
||||
)
|
||||
if artifact.type == generation.ARTIFACT_IMAGE:
|
||||
img = Image.open(io.BytesIO(artifact.binary))
|
||||
img_name = os.path.join(image_dir, f"{artifact.seed}.png")
|
||||
img.show()
|
||||
img.save(img_name) # Save the image with the seed in the filename.
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error during image generation or saving: {e}")
|
||||
|
||||
# Example usage:
|
||||
# generate_stable_diffusion_image("A futuristic cityscape", "/path/to/save/images/")
|
||||
|
||||
image_dir = '/home/ajsingh/pseo_experiments/lib'
|
||||
generate_stable_diffusion_image(prompt, image_dir)
|
||||
|
||||
44
lib/toc_example.py
Normal file
44
lib/toc_example.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
def create_table_of_contents(html_content):
|
||||
"""
|
||||
Create a table of contents for a given HTML content.
|
||||
|
||||
Args:
|
||||
html_content (str): HTML content of the blog post.
|
||||
|
||||
Returns:
|
||||
str: HTML content with a table of contents.
|
||||
"""
|
||||
# Use BeautifulSoup to parse the HTML
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
|
||||
# Find all header tags (h1, h2, h3, h4, h5, h6)
|
||||
headers = soup.find_all(re.compile('^h[1-6]$'))
|
||||
|
||||
# Create a table of contents
|
||||
toc = BeautifulSoup('<div id="table-of-contents"><h2>Table of Contents</h2><ul></ul></div>', 'html.parser')
|
||||
toc_ul = toc.find('ul')
|
||||
|
||||
# Loop through headers and add them to the table of contents
|
||||
for i, header in enumerate(headers, start=1):
|
||||
header_id = f"toc_{i}"
|
||||
header['id'] = header_id
|
||||
|
||||
toc_entry = soup.new_tag('li')
|
||||
toc_link = soup.new_tag('a', href=f"#{header_id}")
|
||||
toc_link.string = header.get_text()
|
||||
toc_entry.append(toc_link)
|
||||
toc_ul.append(toc_entry)
|
||||
|
||||
# Insert the table of contents at the beginning of the content
|
||||
soup.insert(0, toc)
|
||||
|
||||
return str(soup)
|
||||
|
||||
# Example usage
|
||||
html_content = "<h1>Title</h1><p>Some text</p><h2>Subtitle 1</h2><p>Text under subtitle 1</p><h2>Subtitle 2</h2><p>Text under subtitle 2</p>"
|
||||
html_with_toc = create_table_of_contents(html_content)
|
||||
print(html_with_toc)
|
||||
|
||||
@@ -14,23 +14,34 @@ logger.add(sys.stdout,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
from .gpt_providers.openai_gpt_provider import openai_chatgpt, openai_chatgpt_streaming_text, speech_to_text
|
||||
from .gpt_providers.stt_audio_blog import speech_to_text
|
||||
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
|
||||
|
||||
def youtube_to_blog(video_url):
|
||||
"""Function to transcribe a given youtube url """
|
||||
# fixme: Doesnt work all types of yt urls.
|
||||
vid_id = video_url.split("=")[1]
|
||||
hti = Html2Image(output_path="../blog_images")
|
||||
hti.screenshot(url=video_url, save_as=f"yt-img-{vid_id}.png")
|
||||
yt_img_path = os.path.join("../blog_images", f"yt-img-{vid_id}.png")
|
||||
#hti = Html2Image(output_path="../blog_images")
|
||||
#hti.screenshot(url=video_url, save_as=f"yt-img-{vid_id}.png")
|
||||
#yt_img_path = os.path.join("../blog_images", f"yt-img-{vid_id}.png")
|
||||
|
||||
try:
|
||||
audio_text = speech_to_text(video_url)
|
||||
audio_blog_content = summarize_youtube_video(audio_text)
|
||||
return(yt_img_path, audio_blog_content)
|
||||
# Starting the speech-to-text process
|
||||
logger.info("Starting with Speech to Text.")
|
||||
audio_text, audio_title = speech_to_text(video_url)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: Failed to transcribe YouTube video_url: {video_url} with error: {e}")
|
||||
logger.error(f"Error in speech_to_text: {e}")
|
||||
sys.exit(1) # Exit the program due to error in speech_to_text
|
||||
|
||||
try:
|
||||
# Summarizing the content of the YouTube video
|
||||
audio_blog_content = summarize_youtube_video(audio_text)
|
||||
return audio_blog_content, audio_title
|
||||
except Exception as e:
|
||||
logger.error(f"Error in summarize_youtube_video: {e}")
|
||||
sys.exit(1) # Exit the program due to error in summarize_youtube_video
|
||||
return audio_blog_content
|
||||
|
||||
|
||||
def summarize_youtube_video(user_content):
|
||||
@@ -42,6 +53,7 @@ def summarize_youtube_video(user_content):
|
||||
Returns:
|
||||
A string containing the summary of the video.
|
||||
"""
|
||||
logger.info("Start summarize_youtube_video..")
|
||||
prompt = f"""
|
||||
You are an expert copywriter specializing in content optimization for SEO.
|
||||
Your task is to transform a given transcript into a well-structured and engaging blog article. Your objectives include:
|
||||
@@ -65,6 +77,8 @@ def summarize_youtube_video(user_content):
|
||||
Follow the above guidelines to create a well-optimized, unique, and informative article that will rank well in search engine results and engage readers effectively.
|
||||
Craft a blog content from the following transcript:\n{user_content}
|
||||
"""
|
||||
#completion_text = openai_chatgpt_streaming_text(prompt)
|
||||
completion_text = openai_chatgpt(prompt)
|
||||
return completion_text
|
||||
try:
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating blog summary: {err}")
|
||||
|
||||
Reference in New Issue
Block a user