Blogen-V.000.0.1 Added features,Cleanup. WIP

This commit is contained in:
AjaySi
2023-12-09 18:07:18 +05:30
parent edc468f4aa
commit eaf13c2d16
164 changed files with 1859 additions and 71990 deletions

33
lib/blog_proof_reader.py Normal file
View File

@@ -0,0 +1,33 @@
def blog_proof_editor(blog_content, blog_keywords):
"""
Helper for blog proof reading.
"""
prompt = """I am looking for detailed editing and enhancement of the given blog post,
with a particular focus on maintaining originality.
The topic of the content is [{blog_keywords}]. Please go through the blog and make direct edits to improve it,
ensuring the final output is both high-quality and original.
Note: There are duplicates headings and corresponding paragraphs, rewrite into one subheading.
Here are the specific areas to focus on:
1). Ensure Originality: Edit any sections that lack originality, replacing them with unique and creative content.
2). Eliminate Repetitive Language: Rewrite repetitive phrases with varied and engaging language.
3). Vocabulary and Grammar Enhancement: Directly correct any grammatical errors and upgrade the
vocabulary for better readability.
4). Improve Sentence Structure: Enhance sentence construction for better clarity and flow.
5). Tone and Brand Alignment: Adjust the tone, voice, personality of given content to make it unique.
6). Optimize Content Structure: Reorganize the content for a more impactful presentation,
including better paragraphing and transitions.
7). Remove Redundancies: Important, Cut out any redundant information or overly complex jargon.
8). Refine Overall Structure: Make structural changes to improve the overall impact of the content.
9). Remember, rewrite all content that repeated, while maintaining the formatting of the given blog text.
Please apply these changes directly to the following blog text and provide the edited version:
[blog_content]. """
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error Blog Proof Reading: {err}")

View File

@@ -0,0 +1,40 @@
def blog_with_research(report, blog):
"""Combine the given online research and gpt blog content"""
prompt = f"""
You are an expert copywriter specializing in content optimization for SEO.
I will provide you with a research report and a blog content on the same topic.
Treat the research report as the context for the blog and better it accordingly.
Your task is to transform and combine the given research and blog content into a well-structured, unique
and engaging blog article.
Your objectives include:
1. Master the report and blog content: Understand main ideas, key points, and the core message.
2. Sentence Structure: Rephrase while preserving logical flow and coherence.
3. Identify Main Keyword: Determine the primary topic and combine the articles on the main topic.
4. Keyword Integration: Naturally integrate keywords in headings, subheadings, and body text, avoiding overuse.
5. Write Unique Content: Avoid direct copying from given report and blog; rewrite in your own words and style.
6. Optimize for SEO: Generate high quality informative content.
Implement SEO best practises with appropriate keyword density.
7. Craft Engaging and Informative Article: Provide value and insight to readers.
8. Proofread: Important to Check for grammar, spelling, and punctuation errors.
9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases,
interjections, and colloquialisms. Avoid repetitive phrases and unnatural sentence structures.
10. Structuring: Include an Introduction, subtopics and use bullet points or
numbered lists if appropriate. Important to include FAQs, and Conclusion.
11. Ensure Uniqueness: Guarantee the article is plagiarism-free. Write in unique, informative style.
12. Punctuation: Use appropriate question marks at the end of questions.
13. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
14. REMEMBER to give final response as complete HTML.
Follow these guidelines to create a well-optimized, unique, and informative article
that will rank well in search engine results and engage readers effectively.
Create a blog post from the given research report and blog content below.
Research report: {report}
Blog content: {blog}
"""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in combining research report and blog content.")

8
lib/config.json Normal file
View File

@@ -0,0 +1,8 @@
{
"wordpress_url": "https://latestaitools.in/",
"wordpress_username": "username",
"wordpress_password": "password",
"image_dir": "path/to/image_dir",
"output_path": "path/to/output_path"
}

View File

@@ -0,0 +1,27 @@
def convert_tomarkdown_format(blog_content):
""" Helper for converting content to markdown format for static sites. """
prompt = f"""
As an expert in markdown language format and font matter, used for static webpages.
Your task is to convert and improve formatting of given blog content.
Do Not modify the content, only modify to convert it into highly readable blog content.
Use below guidelines and include other best practises:
1). Headers for Structure: Use # for main headings and increase the number of # for
subheadings (##, ###, etc.). Organize given content into clear, hierarchical sections.
2). Emphasizing Text: Use single asterisks or underscores for italic (*italic* or _italic_),
double for bold (**bold** or __bold__), and triple for bold italic (***bold italic***).
3). Lists: For unordered lists, use dashes, asterisks, or plus signs (-, *, +).
For ordered lists, use numbers followed by periods (1., 2., etc.).
4). Blockquotes: Use > for blockquotes, and add additional > for nested blockquotes.
5). Code Blocks: Use backticks for inline code (code) and triple backticks for code blocks.
Specify a language for syntax highlighting.
6). Horizontal Lines: Create a horizontal line using three or more asterisks, dashes, or underscores (---, ***).
7). Table Formatting: Use pipes | and dashes - to create tables. Align text with colons.
Convert the given blog content in well organised markdown content: {blog_content}"""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in converting to Markdown format.")

View File

@@ -0,0 +1,35 @@
def convert_markdown_to_html(md_content):
""" Helper function to convert given text to HTML
"""
prompt =f"""
You are a skilled web developer tasked with converting a Markdown-formatted text to HTML.
You will be given text in markdown format. Follow these steps to perform the conversion:
1. Parse User's Markdown Input: You will receive a Markdown-formatted text as input from the user.
Carefully analyze the provided Markdown text, paying attention to different elements such as headings (#),
lists (unordered and ordered), bold and italic text, links, images, and code blocks.
2. Generate and Validate HTML: Generate corresponding HTML code for each Markdown element following
the conversion guidelines below. Ensure the generated HTML is well-structured and syntactically correct.
3. Preserve Line Breaks: Markdown line breaks (soft breaks) represented by two spaces at the end of a
line should be converted to <br> tags in HTML to preserve the line breaks.
4. REMEMBER to generate complete, valid HTML response only.
Follow below Conversion Guidelines:
- Headers: Convert Markdown headers (#, ##, ###, etc.) to corresponding HTML header tags (<h1>, <h2>, <h3>, etc.).
- Lists: Convert unordered lists (*) and ordered lists (1., 2., 3., etc.) to <ul> and <ol> HTML tags, respectively.
List items should be enclosed in <li> tags.
- Emphasis: Convert bold (**) and italic (*) text to <strong> and <em> HTML tags, respectively.
- Links: Convert Markdown links ([text](url)) to HTML anchor (<a>) tags. Ensure the href attribute contains the correct URL.
- Images: Convert Markdown image tags (![alt text](image_url)) to HTML image (<img>) tags.
Include the alt attribute for accessibility.
- Code: Convert inline code (`code`) to <code> HTML tags. Convert code blocks (```) to <pre> HTML tags
for preserving formatting.
- Blockquotes: Convert blockquotes (>) to <blockquote> HTML tags.
Convert the following Markdown text to HTML: {md_content}
"""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in convert to HTML")

16
lib/get_blog_category.py Normal file
View File

@@ -0,0 +1,16 @@
def get_blog_categories(blog_article):
"""
Function to generate blog categories for given blog content.
"""
prompt = f"""As an expert SEO and content writer, I will provide you with blog content.
Suggest only 2 blog categories which are most relevant to provided blog content,
by identifying the main topic. Also consider the target audience and the
blog's category taxonomy. Only reply with comma separated values. The blog content is: {blog_article}"
"""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
except Exception as err:
SystemError(f"Error in generating blog categories: {err}")
else:
return response

View File

@@ -0,0 +1,15 @@
def get_blog_conclusion(blog_content):
"""
Accepts a blog content and concludes it.
"""
prompt = f"""As an expert SEO and blog writer, please conclude the given blog providing vital take aways,
summarise key points (no more than 300 characters) in bullet points. The blog content: {blog_content}
"""
logger.info(f"Generating blog conclusion iwth prompt: {prompt}")
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
except Exception as err:
SystemError(f"Error in generating blog conclusion: {err}")
else:
return response

18
lib/get_blog_content.py Normal file
View File

@@ -0,0 +1,18 @@
def generate_topic_outline(blog_title, num_subtopics):
"""
Given a blog title generate an outline for it
"""
# TBD: Remove hardcoding, make dynamic
prompt = f"""As a SEO expert, suggest only {num_subtopics} beginner-friendly and
insightful sub topics for the blog title: {blog_title}.
Respond with only answer and no description, explanations."""
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
# TBD: Include --niche
logger.info(f"Prompt used for blog title Outline :\n{prompt}\n")
# TBD: Add logic for which_provider and which_model
try:
response = openai_chatgpt(prompt)
except Exception as err:
SystemError(f"Error in generating Blog Title: {err}")
return response

16
lib/get_blog_intro.py Normal file
View File

@@ -0,0 +1,16 @@
def get_blog_intro(blog_title, blog_topics):
"""
Generate blog introduction as per title and sub topics
"""
prompt = f"""As a skilled wordsmith, I'll equip you with a blog title and relevant topics, tasking you with crafting an engaging introduction. Your challenge: Create a brief, compelling entry that entices readers to explore the entire post. This introduction must be concise (under 250 characters) yet powerful, clearly stating the blog's purpose and what readers stand to gain. Reply with only the introduction.
Intrigue your audience from the start with vibrant language, employing strong verbs and vivid descriptions. Address a common challenge your readers face, demonstrating empathy and positioning yourself as their go-to expert. Pose thought-provoking questions that prompt reader engagement and contemplation.
Remember, your words matter. This introduction serves as the cornerstone of the blog post. It should not only captivate attention but also encourage deeper exploration. Additionally, strategically integrate relevant keywords to enhance visibility on search engine results pages (SERPs). Your mission: Craft a blog introduction that resonates, leaving readers eager to delve further into the titled piece: '{blog_title}', covering these sub-topics: {blog_topics}."""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
except Exception as err:
SystemError(f"Error in generating Blog Introduction: {err}")
return response

17
lib/get_blog_meta_desc.py Normal file
View File

@@ -0,0 +1,17 @@
def generate_blog_description(blog_content):
"""
Prompt designed to give SEO optimized blog descripton
"""
prompt = f"""As an expert SEO and blog writer, Compose a compelling meta description for the given blog content,
adhering to SEO best practices. Keep it between 150-160 characters, incorporating active verbs,
avoiding all caps and excessive punctuation. Ensure relevance, engage users, and encourage clicks.
Use keywords naturally and provide a glimpse of the content's value to entice readers.
Respond with only one of your best effort and do not include your explanations.
Blog Content: {blog_content}"""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in generating blog description: {err}")

18
lib/get_blog_outline.py Normal file
View File

@@ -0,0 +1,18 @@
def generate_topic_outline(blog_title, num_subtopics):
"""
Given a blog title generate an outline for it
"""
# TBD: Remove hardcoding, make dynamic
prompt = f"""As a SEO expert, suggest only {num_subtopics} beginner-friendly and
insightful sub topics for the blog title: {blog_title}.
Respond with only answer and no description, explanations."""
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
# TBD: Include --niche
logger.info(f"Prompt used for blog title Outline :\n{prompt}\n")
# TBD: Add logic for which_provider and which_model
try:
response = openai_chatgpt(prompt)
except Exception as err:
SystemError(f"Error in generating Blog Title: {err}")
return response

16
lib/get_blog_tags.py Normal file
View File

@@ -0,0 +1,16 @@
def get_blog_tags(blog_article):
"""
Function to suggest tags for the given blog content
"""
# Suggest at least 5 tags for the following blog post [Enter your blog post text here].
prompt = f"""As an expert SEO and blog writer, suggest only 2 relevant and specific blog tags
for the given blog content. Only reply with comma separated values.
Blog content: {blog_article}."""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
except Exception as err:
SystemError(f"Error in generating blog tags: {err}")
else:
return response

20
lib/get_blog_title.py Normal file
View File

@@ -0,0 +1,20 @@
def generate_blog_title(blog_meta_desc):
"""
Given a blog title generate an outline for it
"""
# TBD: Remove hardcoding, make dynamic
prompt = f"""As a SEO expert and content writer, I will provide you with meta description of blog.
Your task is write a SEO optimized, call to action and engaging blog title for it.
Follows SEO best practises to suggest the blog title.
Please keep the titles concise, not exceeding 60 words, and ensure to maintain their meaning.
Respond with only one title and no description or keyword like Title:
Generate blog title for this given blog content: {blog_meta_desc}
"""
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
# TBD: Include --niche
logger.info(f"Prompt used for blog title :{prompt}")
try:
response = openai_chatgpt(prompt)
except Exception as err:
SystemError(f"Error in generating Blog Title: {err}")
return response

47
lib/get_blog_topics.py Normal file
View File

@@ -0,0 +1,47 @@
def generate_blog_topics(blog_keywords, num_blogs, niche):
"""
For a given prompt, generate blog topics.
Using the davinci-instruct-beta-v3 model. Its proven to be an ideal
one for generating unique blog content.
Ex: Generate SEO optimized blog topics on given keywords
"""
prompt = f"""As an SEO specialist and blog writer, write {num_blogs} catchy
and SEO-friendly blog topics on {blog_keywords}. The blog title must be less than 80 characters.
The blog titles must follow best SEO practises, be engaging and invite/tempt users to read full blog.
Do not include descriptions, explanations. Do not number the result."""
# Beware of keywords stuffing, clustering, semantic should help avoid.
if num_blogs > 5:
# Get more keywords, based on user given keywords.
more_keywords = get_related_keywords(num_blogs, blog_keywords, niche)
prompt = prompt + """Use the following keywords wisely, without keyword stuffing: {more_keywords}"""
logger.info(f"Prompt used for generating blog topics: \n{prompt}\n")
try:
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in generating blog topics: {err}")
def get_related_keywords(num_blogs, keywords, niche):
"""
Helper function to get more keywords from GPTs.
"""
# Check if niche: use long tailed, else use popular keywords.
if niche:
prompt = (f"Generate a list without description of the top {num_blogs} most popular and semantically"
f"related long-tailed keywords and entities for the topic of {keywords} that are used in"
"high-quality content and relevant to my competitors."
)
else:
prompt = (f"Generate a list without description of the top {num_blogs} most popular and"
f" semantically related keywords and entities for the topic of {keywords} that are used"
" in high-quality content and relevant to my competitors."
)
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in getting related keywords.")

View File

@@ -22,11 +22,13 @@ nltk.download('punkt', quiet=True)
from nltk.corpus import stopwords
nltk.download('stopwords', quiet=True)
from .gpt_providers.openai_gpt_provider import openai_chatgpt, gen_new_from_given_img
from .gpt_providers.openai_gpt_provider import analyze_and_extract_details_from_image
from .gpt_providers.openai_gpt_provider import gen_new_from_given_img
from .gpt_providers.openai_chat_completion import openai_chatgpt
from .gpt_providers.gpt_vision_img_details import analyze_and_extract_details_from_image
from .generate_image_from_prompt import generate_image
from .write_blogs_from_youtube_videos import youtube_to_blog
from .wordpress_blog_uploader import compress_image, upload_blog_post, upload_media
from .gpt_online_researcher import do_online_research
from loguru import logger
logger.remove()
@@ -35,33 +37,69 @@ logger.add(sys.stdout,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
# Load configuration
#with open('config.json') as config_file:
# config = json.load(config_file)
#wordpress_url = config['wordpress_url']
# fixme: Remove the hardcoding, need add another option OR in config ?
image_dir = "pseo_website/assets/"
image_dir = "blog_images"
image_dir = os.path.join(os.getcwd(), image_dir)
# TBD: This can come from config file.
output_path = "pseo_website/_posts/"
output_path = "blogs"
output_path = os.path.join(os.getcwd(), output_path)
wordpress_url = ''
wordpress_username = ''
wordpress_password = ''
wordpress_username = 'upaudel750'
wordpress_password = 'YvCS VbzQ QSp8 4XZe 0DUw Myys'
def generate_youtube_blog(yt_url_list):
def generate_youtube_blog(yt_url_list, output_format="markdown"):
"""Takes a list of youtube videos and generates blog for each one of them.
"""
# Use to store the blog in a string, to save in a *.md file.
blog_markdown_str = ""
for a_yt_url in yt_url_list:
try:
yt_img_path, yt_blog = youtube_to_blog(a_yt_url)
logger.info(f"Starting to write blog on URL: {a_yt_url}")
yt_blog, yt_title = youtube_to_blog(a_yt_url)
if not yt_title or not yt_blog:
logger.error("No content or title for audio to proceed.")
sys.exit(1)
except Exception as e:
logger.error(f"Error in youtube_to_blog: {e}")
sys.exit(1)
try:
logger.info(f"Starting with online research for URL title: {yt_title}")
research_report = do_online_research(yt_title)
if not research_report:
logger.error(f"Error in do_online_research returned no report: {e}")
sys.exit(1)
except Exception as e:
logger.error(f"Error in do_online_research: {e}")
sys.exit(1)
try:
# Note: Check if the order of input matters for your function
logger.info("Preparing a blog content from audio script and online research content...")
blog_with_research(research_report, yt_blog)
except Exception as e:
logger.error(f"Error in blog_with_research: {e}")
sys.exit(1)
try:
# Get the title and meta description of the blog.
title = generate_blog_title(yt_blog)
blog_meta_desc = generate_blog_description(yt_blog)
title = generate_blog_title(blog_meta_desc)
logger.info(f"Title is {title} and description is {blog_meta_desc}")
#blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
# Generate an introduction for the blog
blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
# Get blog tags and categories.
blog_tags = get_blog_tags(blog_meta_desc)
logger.info(f"Blog tags are: {blog_tags}")
blog_categories = get_blog_categories(blog_meta_desc)
logger.info(f"Blog categories are: {blog_categories}")
# Generate an introduction for the blog
blog_intro = get_blog_intro(title, yt_blog)
logger.info(f"The Blog intro is:\n {blog_intro}")
blog_markdown_str = blog_markdown_str + "\n\n" + f"{blog_intro}" + "\n\n"
@@ -86,20 +124,23 @@ def generate_youtube_blog(yt_url_list):
blog_conclusion = get_blog_conclusion(blog_markdown_str)
# TBD: Add another image.
blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n\n" + f"{blog_conclusion}" + "\n"
print(f"Conclusion: {blog_markdown_str}")
# Get blog tags and categories.
blog_tags = get_blog_tags(yt_blog)
logger.info(f"Blog tags are: {blog_tags}")
blog_categories = get_blog_categories(yt_blog)
logger.info(f"Blog categories are: {blog_categories}")
save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
# Proofread the blog, edit and remove dubplicates and refine it further.
# Presently, fixing the blog keywords to be tags and categories.
blog_keywords = f"{blog_tags} + {blog_categories}"
blog_markdown_str = blog_proof_editor(blog_markdown_str, blog_keywords)
# Check the type of blog format needed by the user.
if 'html' in output_format:
blog_markdown_str = convert_tomarkdown_format(blog_markdown_str)
elif 'markdown' in output_path:
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
#print(html_blog)
# Try to save the blog content in a file, in whichever format. Just dump it.
try:
save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
except Exception as err:
logger.error("Failed to Save blog content: {blog_markdown_str}")
except Exception as e:
# raise assertionerror
@@ -108,7 +149,7 @@ def generate_youtube_blog(yt_url_list):
def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
wordpress=False, output_format="HTML"):
wordpress=False, research_online=False, output_format="HTML"):
"""
This function will take a blog Topic to first generate sections for it
and then generate content for each section.
@@ -136,7 +177,7 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
blog_intro = get_blog_intro(a_blog_topic, tpc_outlines)
logger.info(f"The intro is:\n{blog_intro}")
blog_markdown_str = blog_markdown_str + "### Introduction" + "\n\n" + f"{blog_intro}" + "\n\n"
print(f"\n\n 1 -- BLOG_STR : {blog_markdown_str}\n\n")
# Now, for each blog we have sub topic. Generate content for each of the sub topic.
for a_outline in tpc_outlines:
a_outline = a_outline.replace('"', '')
@@ -145,7 +186,6 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
# a_outline is sub topic heading, hence part ToC also.
#blog_markdown_str = blog_markdown_str + "\n\n" + f"### {a_outline}" + "\n\n"
blog_markdown_str = blog_markdown_str + "\n" + f"\n {sub_topic_content}" + "\n\n"
print(f"\n\n 3 -- BLOG_STR : {blog_markdown_str}\n\n")
# Get the Conclusion of the blog, by passing the generated blog.
blog_conclusion = get_blog_conclusion(blog_markdown_str)
@@ -154,6 +194,11 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
# logger.info/check the final blog content.
logger.info(f"Final blog content: {blog_markdown_str}")
#if research_online:
# # Call on the got-researcher, tavily apis for this. So many apis floating around.
# report = do_online_research_on(blog_keywords)
# blog_markdown_str = blog_with_research(report, blog_markdown_str)
blog_meta_desc = generate_blog_description(blog_markdown_str)
logger.info(f"\nThe blog meta description is:{blog_meta_desc}\n")
@@ -162,10 +207,10 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
blog_tags = get_blog_tags(blog_markdown_str)
logger.info(f"\nBlog tags for generated content: {blog_tags}")
logger.info(f"\nBlog tags for generated content: {blog_tags}\n")
blog_categories = get_blog_categories(blog_markdown_str)
logger.info(f"Generated blog categories: {blog_categories}")
logger.info(f"Generated blog categories: {blog_categories}\n")
# Use chatgpt to convert the text into HTML or markdown.
if 'html' in output_format:
@@ -178,6 +223,7 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
# Similar tags and categories will be missed.
# blog_categories =
# blog_tags =
logger.info("Uploading the blog to wordpress.\n")
main_img_path = compress_image(main_img_path, quality=85)
try:
img_details = analyze_and_extract_details_from_image(main_img_path)
@@ -247,14 +293,16 @@ def generate_blog_title(blog_meta_desc):
Given a blog title generate an outline for it
"""
# TBD: Remove hardcoding, make dynamic
prompt = f"""As a SEO expert and content writer, I will provide you with blog. Your task is write title for it.
prompt = f"""As a SEO expert and content writer, I will provide you with meta description of blog.
Your task is write a SEO optimized, call to action and engaging blog title for it.
Follows SEO best practises to suggest the blog title.
Please keep the titles concise, not exceeding 60 words, and ensure to maintain their meaning.
Respond with only one title and no description, for this given blog content: {blog_meta_desc}
Respond with only one title and no description or keyword like Title:
Generate blog title for this given meta description: {blog_meta_desc}
"""
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
# TBD: Include --niche
logger.debug(f"Prompt used for blog title :{prompt}")
logger.info(f"Prompt used for blog title :{prompt}")
try:
response = openai_chatgpt(prompt)
except Exception as err:
@@ -494,6 +542,35 @@ def remove_stop_words(sentence):
return filtered_sentence
def convert_tomarkdown_format(blog_content):
""" Helper for converting content to markdown format for static sites. """
prompt = f"""
As an expert in markdown language format and font matter, used for static webpages.
Your task is to convert and improve formatting of given blog content.
Do Not modify the content, only modify to convert it into highly readable blog content.
Use below guidelines and include other best practises:
1). Headers for Structure: Use # for main headings and increase the number of # for
subheadings (##, ###, etc.). Organize given content into clear, hierarchical sections.
2). Emphasizing Text: Use single asterisks or underscores for italic (*italic* or _italic_),
double for bold (**bold** or __bold__), and triple for bold italic (***bold italic***).
3). Lists: For unordered lists, use dashes, asterisks, or plus signs (-, *, +).
For ordered lists, use numbers followed by periods (1., 2., etc.).
4). Blockquotes: Use > for blockquotes, and add additional > for nested blockquotes.
5). Code Blocks: Use backticks for inline code (code) and triple backticks for code blocks.
Specify a language for syntax highlighting.
6). Horizontal Lines: Create a horizontal line using three or more asterisks, dashes, or underscores (---, ***).
7). Table Formatting: Use pipes | and dashes - to create tables. Align text with colons.
Convert the given blog content in well organised markdown content: {blog_content}"""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in converting to Markdown format.")
def convert_markdown_to_html(md_content):
""" Helper function to convert given text to HTML
"""
@@ -527,5 +604,86 @@ def convert_markdown_to_html(md_content):
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in convert to HTML")
def blog_with_research(report, blog):
"""Combine the given online research and gpt blog content"""
prompt = f"""
You are an expert copywriter specializing in content optimization for SEO.
I will provide you with a research report and a blog content on the same topic.
Treat the research report as the context for the blog and better it accordingly.
Your task is to transform and combine the given research and blog content into a well-structured, unique
and engaging blog article.
Your objectives include:
1. Master the report and blog content: Understand main ideas, key points, and the core message.
2. Sentence Structure: Rephrase while preserving logical flow and coherence.
3. Identify Main Keyword: Determine the primary topic and combine the articles on the main topic.
4. Keyword Integration: Naturally integrate keywords in headings, subheadings, and body text, avoiding overuse.
5. Write Unique Content: Avoid direct copying from given report and blog; rewrite in your own words and style.
6. Optimize for SEO: Generate high quality informative content.
Implement SEO best practises with appropriate keyword density.
7. Craft Engaging and Informative Article: Provide value and insight to readers.
8. Proofread: Important to Check for grammar, spelling, and punctuation errors.
9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases,
interjections, and colloquialisms. Avoid repetitive phrases and unnatural sentence structures.
10. Structuring: Include an Introduction, subtopics and use bullet points or
numbered lists if appropriate. Important to include FAQs, and Conclusion.
11. Ensure Uniqueness: Guarantee the article is plagiarism-free. Write in unique, informative style.
12. Punctuation: Use appropriate question marks at the end of questions.
13. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
14. REMEMBER to give final response as complete HTML.
Follow these guidelines to create a well-optimized, unique, and informative article
that will rank well in search engine results and engage readers effectively.
Create a blog post from the given research report and blog content below.
Research report: {report}
Blog content: {blog}
"""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in getting related keywords.")
def blog_proof_editor(blog_content, blog_keywords):
"""
Helper for blog proof reading.
"""
if not blog_content and not blog_keywords:
logger.error("Blog proof reader has no content to proofread.")
exit(1)
prompt = f"""I am looking for detailed editing and enhancement of the given blog post,
with a particular focus on maintaining originality.
The topic of the content is [{blog_keywords}]. Please go through the blog and make direct edits to improve it,
ensuring the final output is both high-quality and original.
Note: There are duplicates headings and corresponding paragraphs, rewrite into one subheading.
Here are the specific areas to focus on:
1). Ensure Originality: Edit any sections that lack originality, replacing them with unique and creative content.
2). Eliminate Repetitive Language: Rewrite repetitive phrases with varied and engaging language.
3). Vocabulary and Grammar Enhancement: Directly correct any grammatical errors and upgrade the
vocabulary for better readability.
4). Improve Sentence Structure: Enhance sentence construction for better clarity and flow.
5). Tone and Brand Alignment: Adjust the tone, voice, personality of given content to make it unique.
6). Optimize Content Structure: Reorganize the content for a more impactful presentation,
including better paragraphing and transitions.
7). Remove Redundancies: Important, Cut out any redundant information or overly complex jargon.
8). Refine Overall Structure: Make structural changes to improve the overall impact of the content.
9). Remember, rewrite all content that repeated, while maintaining the formatting of the given blog text.
Please apply these changes directly to the following blog text and provide the edited version:
[{blog_content}]. """
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error Blog Proof Reading: {err}")

19
lib/get_topic_content.py Normal file
View File

@@ -0,0 +1,19 @@
def generate_topic_content(blog_keywords, sub_topic):
"""
For each of given topic generate content for it.
"""
# The outline should contain various subheadings and include the starting sentence for each section.
# TBD: Depending on the usecase 'Voice and style' will change to professional etc.
prompt = f"""As a professional blogger and topic authority on {blog_keywords},
craft factual (no more than 200 characters) subtopic content on {sub_topic}.
Your response should reflect Experience, Expertise, Authoritativeness and Trustworthiness from content.
Voice and style guide: Write in a professional manner, giving enlightening details and reasons.
Use natural language and phrases that a real person would use: in normal conversations.
Format your response using markdown. REMEMBER Not to include introduction or conclusion in your response.
Use headings(h3 to h6 only), subheadings, bullet points, and bold to organize the information."""
logger.info(f"Generate topic content using prompt:\n{prompt}\n")
try:
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in generating topic content: {err}")

View File

@@ -0,0 +1,108 @@
import re #additional import for regex
import os
import json
import requests
from openai import OpenAI
client = OpenAI(
api_key=os.getenv('OPENAI-API-KEY')
)
# Target URL can be a website url or it can google search
query = "kedarkanta trek"
target_url = f"https://www.google.com/search?q={query}&gl=us"
response = requests.get(target_url)
print
html_text = response.text
# Remove unnecessary part to prevent HUGE TOKEN cost!
# Remove everything between <head> and </head>
html_text = re.sub(r'<head.*?>.*?</head>', '', html_text, flags=re.DOTALL)
# Remove all occurrences of content between <script> and </script>
html_text = re.sub(r'<script.*?>.*?</script>', '', html_text, flags=re.DOTALL)
# Remove all occurrences of content between <style> and </style>
html_text = re.sub(r'<style.*?>.*?</style>', '', html_text, flags=re.DOTALL)
completion = client.chat.completions.create(
model="gpt-4-1106-preview",
messages=[
{"role": "system", "content": "You are a master at scraping Google results data. Scrape two things: 1st. Scrape top 10 organic results data and 2nd. Scrape people_also_ask section from Google search result page."},
{"role": "user", "content": html_text}
],
tools=[
{
"type": "function",
"function": {
"name": "parse_organic_results",
"description": "Parse organic results from Google SERP raw HTML data nicely",
"parameters": {
'type': 'object',
'properties': {
'data': {
'type': 'array',
'items': {
'type': 'object',
'properties': {
'title': {'type': 'string'},
'original_url': {'type': 'string'},
'snippet': {'type': 'string'},
'position': {'type': 'integer'}
}
}
}
}
}
}
},
{
"type": "function",
"function": {
"name": "parse_people_also_ask_section",
"description": "Parse `people also ask` section from Google SERP raw HTML",
"parameters": {
'type': 'object',
'properties': {
'data': {
'type': 'array',
'items': {
'type': 'object',
'properties': {
'question': {'type': 'string'},
'original_url': {'type': 'string'},
'answer': {'type': 'string'},
}
}
}
}
}
}
}
],
tool_choice="auto"
)
# Organic_results
argument_str = completion.choices[0].message.tool_calls[0].function.arguments
argument_dict = json.loads(argument_str)
organic_results = argument_dict['data']
print('Organic results:')
for result in organic_results:
print(f"Blog Title: {result['title']}")
print(f"Blog URL: {result['original_url']}")
print(f"Blog Snippet: {result['snippet']}")
print(f"Blog Position: {result['position']}")
print('---')
# People also ask
argument_str = completion.choices[0].message.tool_calls[1].function.arguments
argument_dict = json.loads(argument_str)
people_also_ask = argument_dict['data']
print('People also ask:')
for result in people_also_ask:
print(f"People_Also_Ask: Question: {result['question']}")
print(f"People_Also_Ask: URL: {result['original_url']}")
print("People_Also_Ask: Answer: {result['answer']}")
print('---')

View File

@@ -0,0 +1,38 @@
# Not using it, as they wanted phone verification done.
import os
import serpapi
import csv
from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv('SERPAPI_KEY')
client = serpapi.Client(api_key=api_key)
result = client.search(
q="Retrieval Augumented Generation RAG",
engine="google",
location="Austin, Texas",
hl="en",
gl="us",
)
print(result)
print(result['organic_results'])
print(result["search_information"]["total_results"]) # Get number of results available
print(result["related_questions"]) # Get all the related questions
organic_results = result["organic_results"]
with open('output.csv', 'w', newline='') as csvfile:
csv_writer = csv.writer(csvfile)
# Write the headers
csv_writer.writerow(["Title", "Link", "Snippet"])
# Write the data
for result in organic_results:
csv_writer.writerow([result["title"], result["link"], result["snippet"]])
print('Done writing to CSV file.')

View File

@@ -16,54 +16,68 @@
#
##############################################################
# import and connect
import os
import logging
from tavily import TavilyClient
from langchain.adapters.openai import convert_openai_messages
from langchain.chat_models import ChatOpenAI
def do_research_on(research_query):
"""
Basically sending in the blog title to do research on.
gpt-researcher API version to do extensive web research for given keywords.
"""
# $ export TAVILY_API_KEY={Your Tavily API Key here}
logging.basicConfig(level=logging.INFO, format='%(asctime)s-%(levelname)s-%(module)s-%(lineno)d-%(message)s')
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def do_online_research(query):
try:
client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
except Exception as err:
SystemExit(f"Failed to create TavilyClient: {err}")
# Retrieve API keys
api_key = os.getenv('TAVILY_API_KEY')
openai_api_key = os.getenv('OPENAI_API_KEY')
if not api_key or not openai_api_key:
raise ValueError("API keys for Tavily or OpenAI are not set.")
try:
# run tavily search
research_content = client.search(
research_query,
search_depth="advanced",
include_answer=True,
max_results=10)["results"]
except Exception as err:
SystemExit(f"Unable to do tavily search: {err}")
# Initialize Tavily client
try:
client = TavilyClient(api_key=api_key)
except Exception as err:
logging.error("Failed to create Tavily client. Check TAVILY_API_KEY")
exit(1)
# setup prompt
prompt = [{
"role": "system",
"content": f'You are an AI critical thinker research assistant. '\
f'Your sole purpose is to write well written, critically acclaimed,'\
f'objective and structured reports on given text.'
# Run tavily search
logging.info(f"Running Tavily search on: {query}")
try:
content = client.search(query, search_depth="advanced")["results"]
except Exception as err:
logging.error(f"Failed to do Tavily Research: {err}")
exit(1)
# Setup prompt for GPT-4
prompt = [{
"role": "system",
"content": ('You are an AI critical thinker research assistant. '
'Your sole purpose is to write well written, critically acclaimed, '
'objective and structured reports on given text.')
}, {
"role": "user",
"content": f'Information: """{research_content}"""\n\n' \
f'Using the above information, answer the following'\
f'query: "{research_query}" in a detailed report --'\
f'Please use MLA format and markdown syntax.'
"role": "user",
"content": (f'Information: """{content}"""\n\n'
f'Using the above information, answer the following '
f'query: "{query}" in a detailed report --'
f'Please use MLA format and markdown syntax.')
}]
# run gpt-4
try:
# Run GPT-4
logging.info("Generating report with GPT-4...")
lc_messages = convert_openai_messages(prompt)
research_report = ChatOpenAI(
model='gpt-4',
openai_api_key=openai_api_key
).invoke(lc_messages).content
except Exception as err:
SystemExit(f"Failed to convert OpenAI message and get response.")
try:
report = ChatOpenAI(model='gpt-4', openai_api_key=openai_api_key).invoke(lc_messages).content
logging.info(f"\n Below is the online research report for given keywords/title: \n\n{report}")
return report
except Exception as err:
logging.error("Failed to generate do_online_research with ChatOpenAI")
exit(1)
# print report
print(research_report)
return research_report
except Exception as e:
logging.error(f"Failed in online research: {e}")
exit(1)

View File

@@ -1,19 +1,11 @@
gpt_providers are companies providing commercial/free GPT pre-trained models as saas.
These include openai, Azure, Goodle, FB, Anthrophic etc
# OpenAI ChatGPT Integration for Enhanced Blog Generation
- If you want to use chatgpt and its models, then use openai as gpt_provider
- We plan to integrate most the accurate, widely used models as gpt providers.
- These will also include text to image and video generations as blogging artifacts.
gpt_provider=openai
------------------------------------
Here are some tips for using LLMs to generate ideas:
- Be as specific as possible in your prompts. The more specific you are, the better the LLM will
be able to understand what you are asking for.
- Use keywords in your prompts. This will help the LLM to generate ideas that are relevant to your topic.
- Try different temperatures and top_p values. These parameters control the creativity and diversity of the generated ideas.
- Experiment with different prompts and settings to see what works best for you.
## Introduction
This toolkit, written in Python, integrates OpenAI's ChatGPT and other AI services for comprehensive blog generation. It allows for selecting and fine-tuning OpenAI models to suit various content creation needs, including text generation, image analysis, and speech-to-text conversion.
## Key Features
- **AI-Powered Text Generation**: Leverages OpenAI's ChatGPT for creating engaging and contextually relevant text based on user inputs.
- **Image Analysis and Detail Extraction**: Utilizes OpenAI's Vision API to analyze images and extract important details like Alt Text, Description, Title, and Caption.
- **Dynamic Image Generation**: Generates images from textual descriptions using DALL-E 2 and DALL-E 3 models, enhancing blog visual content.
- **Speech-to-Text Transcription**: Converts audio from YouTube videos to text, enabling easy content repurposing for blogs.
- **Image Variation Creation**: Produces variations of existing images, offering creative flexibility and maintaining topical relevance.

View File

@@ -0,0 +1,56 @@
from openai import OpenAI
from loguru import logger
import sys
from .save_image import save_generated_image
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
@retry(wait=wait_random_exponential(min=1, max=120), stop=stop_after_attempt(6))
def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd", n=1):
"""
Generates images using the DALL-E 3 model based on a given text prompt.
Args:
img_prompt (str): Text prompt to generate the image.
image_dir (str): Directory where the generated image will be saved.
size (str, optional): Size of the generated images. Defaults to "1024x1024".
quality (str, optional): Quality of the generated images. Defaults to "hd".
n (int, optional): Number of images to generate. Defaults to 1.
Returns:
str: Path to the saved image.
Raises:
SystemExit: If an error occurs in image generation or saving.
"""
try:
logger.info("Generating Dall-e-3 image for the blog.")
client = OpenAI()
img_generation_response = client.images.generate(
model="dall-e-3",
prompt=img_prompt,
size=size,
quality=quality,
n=n
)
# Save the generated image locally.
try:
img_path = save_generated_image(img_generation_response, image_dir)
return img_path
except Exception as err:
logger.error(f"Failed to Save generated image: {err}")
except openai.OpenAIError as e:
logger.error(f"Dalle-3 image generation error: HTTP Status {e.http_status}, Error: {e.error}")
sys.exit("Exiting due to Dalle-3 image generation error.")
except Exception as e:
logger.error(f"Failed to generate images with Dalle3: {e}")
sys.exit("Exiting due to a general error in image generation.")

View File

@@ -0,0 +1,61 @@
from openai import OpenAI
from loguru import logger
import sys
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
from .save_image import save_generated_image
@retry(wait=wait_random_exponential(min=1, max=120), stop=stop_after_attempt(6))
def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd", n=1):
"""
Generates images using the DALL-E 3 model based on a given text prompt.
Args:
img_prompt (str): Text prompt to generate the image.
image_dir (str): Directory where the generated image will be saved.
size (str, optional): Size of the generated images. Defaults to "1024x1024".
quality (str, optional): Quality of the generated images. Defaults to "hd".
n (int, optional): Number of images to generate. Defaults to 1.
Returns:
str: Path to the saved image.
Raises:
SystemExit: If an error occurs in image generation or saving.
"""
try:
logger.info("Generating Dall-e-3 image for the blog.")
client = OpenAI()
img_generation_response = client.images.generate(
model="dall-e-3",
prompt=img_prompt,
size=size,
quality=quality,
n=n
)
img_path = save_generated_image(img_generation_response, image_dir)
return img_path
except openai.OpenAIError as e:
logger.error(f"Dalle-3 image generation error: HTTP Status {e.http_status}, Error: {e.error}")
sys.exit("Exiting due to Dalle-3 image generation error.")
except Exception as e:
logger.error(f"Failed to generate images with Dalle3: {e}")
sys.exit("Exiting due to a general error in image generation.")
# Example usage
if __name__ == "__main__":
try:
image_path = generate_dalle3_images("A futuristic cityscape", "/path/to/image/dir")
print(f"Image generated and saved at: {image_path}")
except SystemExit as e:
print(f"Terminated: {e}")

View File

@@ -0,0 +1,51 @@
from loguru import logger
import sys
from PIL import Image
from openai import OpenAI
def gen_new_from_given_img(img_path, image_dir, num_img=1, img_size="1024x1024", response_format="url"):
"""
Generates variations of a given image using OpenAI's image variation API.
This function takes an existing image, processes it, and generates a specified number of new images based on it.
These generated images are variations of the original, providing creative flexibility.
Args:
img_path (str): Path to the original image file.
image_dir (str): Directory where the generated images will be saved.
num_img (int, optional): Number of image variations to generate. Defaults to 1.
img_size (str, optional): Size of the generated images. Defaults to "1024x1024".
response_format (str, optional): Format in which the generated images are returned. Defaults to "url".
Returns:
str: Path to the saved image variation.
Raises:
SystemExit: If a critical error occurs that prevents successful execution.
"""
try:
logger.info(f"Starting image variation generation for: {img_path}")
# Convert and prepare the image
png = Image.open(img_path).convert('RGBA')
background = Image.new('RGBA', png.size, (255, 255, 255))
alpha_composite = Image.alpha_composite(background, png)
alpha_composite.save(img_path, 'PNG', quality=80)
logger.info("Image prepared for variation generation.")
client = OpenAI()
variation_response = client.images.create_variation(
image=open(img_path, "rb"),
n=num_img,
size=img_size,
response_format=response_format
)
# Saving the generated image
generated_image_path = save_generated_image(variation_response, image_dir)
logger.info(f"Image variation generated and saved to: {generated_image_path}")
return generated_image_path
except Exception as e:
logger.error(f"Error occurred during image variation generation: {e}")
sys.exit(f"Exiting due to critical error: {e}")

View File

@@ -0,0 +1,106 @@
import requests
import re
import base64
import os
import sys
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def analyze_and_extract_details_from_image(image_path):
"""
Analyzes an image using OpenAI's Vision API to extract Alt Text, Description, Title, and Caption.
This function encodes an image to a base64 string and sends a request to the OpenAI API.
It interprets the contents of the image, returning a textual description.
Args:
image_path (str): Path to the image file.
Returns:
dict: A dictionary with extracted details including Alt Text, Description, Title, and Caption.
None: If an error occurs during processing.
Raises:
SystemExit: If a critical error occurs that prevents the function from executing successfully.
"""
try:
logger.info("Starting image analysis using OpenAI's Vision API.")
def encode_image(path):
""" Encodes an image to a base64 string. """
with open(path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
base64_image = encode_image(image_path)
logger.info("Image encoded to base64 successfully.")
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Analyze the given image and suggest the following: Alternative text(Alt Text), description, title, caption."
},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
}
]
}
],
"max_tokens": 300
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
response.raise_for_status()
assistant_message = response.json()['choices'][0]['message']['content']
logger.info("Received response from OpenAI API.")
# Extracting details using regular expressions
alt_text_match = re.search(r'Alt Text: "(.*?)"', assistant_message)
description_match = re.search(r'Description: (.*?)\n\n', assistant_message)
title_match = re.search(r'Title: "(.*?)"', assistant_message)
caption_match = re.search(r'Caption: "(.*?)"', assistant_message)
image_details = {
'alt_text': alt_text_match.group(1) if alt_text_match else "N/A",
'description': description_match.group(1) if description_match else "N/A",
'title': title_match.group(1) if title_match else "N/A",
'caption': caption_match.group(1) if caption_match else "N/A"
}
logger.info("Image analysis completed successfully.")
return image_details
except requests.RequestException as e:
logger.error(f"GPT-Vision API communication failure. Error: {e}")
sys.exit(f"Exiting due to GPT-Vision API communication failure: {e}")
except Exception as e:
logger.error(f"Unexpected error occurred during image analysis: {e}")
sys.exit(f"Exiting due to an unexpected error: {e}")
# Example usage
if __name__ == "__main__":
image_path = "path/to/your/image.jpg"
try:
details = analyze_and_extract_details_from_image(image_path)
if details:
print(f"Extracted image details: {details}")
else:
print("No details extracted from the image.")
except SystemExit as e:
print(f"Terminated: {e}")

View File

@@ -0,0 +1,63 @@
import time
import logging
import openai
import os
# Configure standard logging
logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s')
logger = logging.getLogger(__name__)
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def openai_chatgpt(prompt, model="gpt-4-1106-preview", temperature=0.2, max_tokens=4096, top_p=0.9, n=1):
"""
Wrapper function for OpenAI's ChatGPT completion.
Args:
prompt (str): The input text to generate completion for.
model (str, optional): Model to be used for the completion. Defaults to "gpt-4-1106-preview".
temperature (float, optional): Controls randomness. Lower values make responses more deterministic. Defaults to 0.2.
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 8192.
top_p (float, optional): Controls diversity. Defaults to 0.9.
n (int, optional): Number of completions to generate. Defaults to 1.
Returns:
str: The generated text completion.
Raises:
SystemExit: If an API error, connection error, or rate limit error occurs.
"""
# Wait for 10 seconds to comply with rate limits
for _ in range(10):
time.sleep(1)
try:
client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
n=n,
top_p=top_p
# Additional parameters can be included here
)
return response.choices[0].message.content
except openai.APIError as e:
logger.error(f"OpenAI API Error: {e}")
raise SystemExit from e
except openai.APIConnectionError as e:
logger.error(f"Failed to connect to OpenAI API: {e}")
raise SystemExit from e
except openai.RateLimitError as e:
logger.error(f"Rate limit exceeded on OpenAI API request: {e}")
raise SystemExit from e
except Exception as err:
logger.error(f"OpenAI error: {err}")
raise SystemExit from e

View File

@@ -0,0 +1,53 @@
import sys
import logging
import openai
# Configure standard logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def openai_chatgpt_streaming_text(user_prompt):
"""
Uses streaming functionality to get real-time output from OpenAI's GPT model.
Args:
user_prompt (str): The prompt to send to the model.
Returns:
str: The complete text generated by the model in response to the prompt.
Raises:
SystemExit: If an error occurs in connecting to the OpenAI API or during streaming.
"""
try:
client = openai.OpenAI()
response = client.chat.completions.create(
model="gpt-3.5-turbo-16k",
messages=[{"role": "user", "content": user_prompt}],
max_tokens=8192,
temperature=0.9,
n=1,
stream=True
)
collected_events = []
completion_text = ''
logger.info("Starting to receive streaming responses...")
for chunk in response:
collected_events.append(chunk) # Save the event response
event_text = chunk.choices[0].delta.content # Extract the text
completion_text += event_text # Append the text
sys.stdout.write(event_text)
sys.stdout.flush()
logger.info("Completed receiving streaming responses.")
return completion_text
except openai.OpenAIError as e:
logger.error(f"OpenAI API Error: {e}")
sys.exit("Exiting due to OpenAI API error.")
except Exception as e:
logger.error(f"Unexpected error during streaming: {e}")
sys.exit("Exiting due to an unexpected error.")

View File

@@ -20,7 +20,12 @@ import tempfile
from html2image import Html2Image
import datetime
from PIL import Image
import moviepy.editor as mp
import requests
from moviepy.editor import AudioFileClip
from concurrent.futures import ThreadPoolExecutor
from ..gpt_online_researcher import do_online_research
from loguru import logger
logger.remove()
@@ -29,8 +34,6 @@ logger.add(sys.stdout,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
def analyze_and_extract_details_from_image(image_path):
"""
Analyzes an image using OpenAI's Vision API and extracts Alt Text, Description, Title, and Caption.
@@ -103,12 +106,14 @@ def analyze_and_extract_details_from_image(image_path):
return image_details
except requests.RequestException as e:
sys.exit(f"Error: Failed to communicate with OpenAI API. Error: {e}")
#sys.exit(f"Error: GPT-Vision: Failed to communicate with OpenAI API. Error: {e}")
logger.error(f"Error: GPT-Vision: Failed to communicate with OpenAI API. Error: {e}")
except Exception as e:
sys.exit(f"Error occurred: {e}")
#sys.exit(f"Error occurred- GPT-Vision: {e}")
logger.error(f"Error occurred- GPT-Vision: {e}")
def openai_chatgpt(prompt, model="gpt-3.5-turbo-16k", temperature=0.2, max_tokens=8192, top_p=0.9, n=1):
def openai_chatgpt(prompt, model="gpt-4-1106-preview", temperature=0.2, max_tokens=4096, top_p=0.9, n=1):
"""
Wrapper function for openai chat Completion
"""
@@ -119,6 +124,10 @@ def openai_chatgpt(prompt, model="gpt-3.5-turbo-16k", temperature=0.2, max_token
try:
client = OpenAI()
except Exception as err:
print("Error: OpenAI Client.")
exit(1)
try:
# using OpenAI's Completion module that helps execute any tasks involving text
response = client.chat.completions.create(
# model name used, there are many other models available under the umbrella of GPT-3
@@ -142,6 +151,8 @@ def openai_chatgpt(prompt, model="gpt-3.5-turbo-16k", temperature=0.2, max_token
except openai.RateLimitError as e:
#Handle rate limit error (we recommend using exponential backoff)
SystemError(f"OpenAI API request exceeded rate limit: {e}")
except Exception as err:
SystemError(f"OpenAI client Error: {err}")
return response.choices[0].message.content
@@ -231,39 +242,57 @@ def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd"
return img_path
def speech_to_text(video_url):
""" Common openai function for speech to text. """
client = OpenAI()
def speech_to_text(video_url, output_path='.'):
""" Transcribes speech to text from a YouTube video URL. """
try:
# Download YouTube video
logger.info(f"Download YouTube video: {video_url}")
# Create a YouTube object
print(f"Accessing YouTube URL: {video_url}")
yt = YouTube(video_url)
stream = yt.streams.filter(only_audio=True).first()
# Save the video in a temporary file
logger.info(f"Finished Downloading, Saving video for transcription.")
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
temp_file_name = temp_file.name
# Select the highest quality audio stream
print("Fetching audio stream. Select the highest quality audio stream")
audio_stream = yt.streams.filter(only_audio=True).first()
stream.download(output_path=os.path.dirname(temp_file_name), filename=os.path.basename(temp_file_name))
try:
# Transcribe the video using OpenAI's Whisper API
logger.info(f"Transcribe the video using OpenAI's Whisper API")
with open(temp_file_name, "rb") as audio_file:
if audio_stream is None:
print("No audio stream found for this video.")
return
else:
# Download the audio stream
print(f"Downloading audio for: {yt.title}")
audio_file = audio_stream.download(output_path)
print(f"Downloaded: {yt.title} to {output_path}")
try:
# Check if the audio file size is less than 24MB
max_file_size = 24 * 1024 * 1024 # 24MB in bytes
file_size = os.path.getsize(audio_file)
if file_size > max_file_size:
print("Error: File size exceeds 24MB limit.")
exit(1)
# File uploads are currently limited to 25 MB and the following input
# file types are supported: mp3, mp4, mpeg, mpga, m4a, wav, and webm.
try:
client = OpenAI()
except Exception as err:
SystemExit("Unable to get openai client object: {err}")
print("Transcribing using Openai whisper.")
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
model="whisper-1",
file=open(audio_file, "rb"),
response_format="text"
)
except Exception as err:
logger.error(f"Failed to transcribe using whisper model: {err}")
logger.info("Finished Transcribing. Creating a blog from the transcript.")
# Remove the temporary file after transcription
os.remove(temp_file_name)
return(transcript)
return transcript
except Exception as err:
print(f"Failed in whisper transcription: {err}")
exit(1)
except Exception as e:
logger.error(f"Error: speech-to-text, Failed to transcribe url: {video_url} with error: {e}")
print(f"YT video download, An error occurred: {e}")
exit(1)
os.remove(audio_file)
# The idea is to download images from other blogs and recreate from it.

View File

@@ -0,0 +1,35 @@
import datetime
import os
import requests
from PIL import Image
import logging
def save_generated_image(img_generation_response, image_dir):
"""
Save generated images for blog, ensuring unique names for SEO.
"""
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
generated_image_name = f"generated_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.png"
generated_image_filepath = os.path.join(image_dir, generated_image_name)
generated_image_url = img_generation_response.data[0].url
logger.info(f"Fetch the image from url: {generated_image_url}")
try:
response = requests.get(generated_image_url, stream=True)
response.raise_for_status()
with open(generated_image_filepath, "wb") as image_file:
image_file.write(response.content)
except requests.exceptions.RequestException as e:
logger.error(f"Failed to get generated image content: {e}")
return None
logger.info(f"Saved image at path: {generated_image_filepath}")
if os.environ.get('DISPLAY', ''): # Check if display is supported
img = Image.open(generated_image_filepath)
img.show()
return generated_image_filepath

View File

@@ -0,0 +1,88 @@
from pytube import YouTube
import os
import sys
from loguru import logger
from openai import OpenAI
from tqdm import tqdm
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
def progress_function(stream, chunk, bytes_remaining):
# Calculate the percentage completion
current = ((stream.filesize - bytes_remaining) / stream.filesize)
progress_bar.update(current - progress_bar.n) # Update the progress bar
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def speech_to_text(video_url, output_path='.'):
"""
Transcribes speech to text from a YouTube video URL using OpenAI's Whisper model.
Args:
video_url (str): URL of the YouTube video to transcribe.
output_path (str, optional): Directory where the audio file will be saved. Defaults to '.'.
Returns:
str: The transcribed text from the video.
Raises:
SystemExit: If a critical error occurs that prevents successful execution.
"""
try:
logger.info(f"Accessing YouTube URL: {video_url}")
yt = YouTube(video_url, on_progress_callback=progress_function)
logger.info("Fetching the highest quality audio stream")
audio_stream = yt.streams.filter(only_audio=True).first()
if audio_stream is None:
logger.warning("No audio stream found for this video.")
return None
#logger.info(f"Downloading audio for: {yt.title}")
global progress_bar
progress_bar = tqdm(total=1.0, unit='iB', unit_scale=True, desc=yt.title)
audio_file = audio_stream.download(output_path)
progress_bar.close()
logger.info(f"Audio downloaded: {yt.title} to {output_path}")
# Checking file size
max_file_size = 24 * 1024 * 1024 # 24MB
file_size = os.path.getsize(audio_file)
# Convert file size to MB for logging
file_size_MB = file_size / (1024 * 1024) # Convert bytes to MB
logger.info(f"Downloaded Audio Size is: {file_size_MB:.2f} MB")
if file_size > max_file_size:
logger.error("File size exceeds 24MB limit.")
sys.exit("File size limit exceeded.")
try:
logger.info("Initializing OpenAI client for transcription.")
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
logger.info("Transcribing using OpenAI's Whisper model.")
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=open(audio_file, "rb"),
response_format="text"
)
logger.info("\nYouTube video transcription:\n\n{transcript}\n")
return transcript, yt.title
except Exception as e:
logger.error(f"Failed in Whisper transcription: {e}")
sys.exit("Transcription failure.")
except Exception as e:
logger.error(f"An error occurred during YouTube video processing: {e}")
sys.exit("Video processing failure.")
finally:
if os.path.exists(audio_file):
os.remove(audio_file)
logger.info("Temporary audio file removed.")

View File

@@ -0,0 +1,74 @@
from pytube import YouTube
import os
import sys
from loguru import logger
from openai import OpenAI
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def speech_to_text(video_url, output_path='.'):
"""
Transcribes speech to text from a YouTube video URL using OpenAI's Whisper model.
Args:
video_url (str): URL of the YouTube video to transcribe.
output_path (str, optional): Directory where the audio file will be saved. Defaults to '.'.
Returns:
str: The transcribed text from the video.
Raises:
SystemExit: If a critical error occurs that prevents successful execution.
"""
try:
logger.info(f"Accessing YouTube URL: {video_url}")
yt = YouTube(video_url)
logger.info("Fetching the highest quality audio stream")
audio_stream = yt.streams.filter(only_audio=True).first()
if audio_stream is None:
logger.warning("No audio stream found for this video.")
return None
logger.info(f"Downloading audio for: {yt.title}")
audio_file = audio_stream.download(output_path)
logger.info(f"Audio downloaded: {yt.title} to {output_path}")
# Checking file size
max_file_size = 24 * 1024 * 1024 # 24MB
logger.info(f"Downloaded Audio Size is: {max_file_size}")
file_size = os.path.getsize(audio_file)
if file_size > max_file_size:
logger.error("File size exceeds 24MB limit.")
sys.exit("File size limit exceeded.")
try:
logger.info("Initializing OpenAI client for transcription.")
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
logger.info("Transcribing using OpenAI's Whisper model.")
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=open(audio_file, "rb"),
response_format="text"
)
return transcript, yt.title
except Exception as e:
logger.error(f"Failed in Whisper transcription: {e}")
sys.exit("Transcription failure.")
except Exception as e:
logger.error(f"An error occurred during YouTube video processing: {e}")
sys.exit("Video processing failure.")
finally:
if os.path.exists(audio_file):
os.remove(audio_file)
logger.info("Temporary audio file removed.")

124
lib/main_audio_to_blog.py Normal file
View File

@@ -0,0 +1,124 @@
import json
import os
import datetime #I wish
import sys
import openai
from tqdm import tqdm, trange
import time
import re
from textwrap import dedent
import nltk
nltk.download('punkt', quiet=True)
from nltk.corpus import stopwords
nltk.download('stopwords', quiet=True)
from .write_blogs_from_youtube_videos import youtube_to_blog
from .wordpress_blog_uploader import compress_image, upload_blog_post, upload_media
from .gpt_online_researcher import do_online_research
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
# fixme: Remove the hardcoding, need add another option OR in config ?
image_dir = "blog_images"
image_dir = os.path.join(os.getcwd(), image_dir)
# TBD: This can come from config file.
output_path = "blogs"
output_path = os.path.join(os.getcwd(), output_path)
wordpress_url = 'https://latestaitools.in/'
wordpress_username = 'upaudel750'
wordpress_password = 'YvCS VbzQ QSp8 4XZe 0DUw Myys'
def generate_youtube_blog(yt_url_list, output_format="markdown"):
"""Takes a list of youtube videos and generates blog for each one of them.
"""
# Use to store the blog in a string, to save in a *.md file.
blog_markdown_str = ""
for a_yt_url in yt_url_list:
try:
logger.info(f"Starting to write blog on URL: {a_yt_url}")
yt_blog = youtube_to_blog(a_yt_url)
except Exception as e:
logger.error(f"Error in youtube_to_blog: {e}")
sys.exit(1)
try:
logger.info("Starting with online research for URL title.")
research_report = do_online_research(yt_blog)
except Exception as e:
logger.error(f"Error in do_online_research: {e}")
sys.exit(1)
try:
# Note: Check if the order of input matters for your function
logger.info("Preparing a blog content from audio script and online research content...")
blog_with_research(research_report, yt_blog)
except Exception as e:
logger.error(f"Error in blog_with_research: {e}")
sys.exit(1)
try:
# Get the title and meta description of the blog.
blog_meta_desc = generate_blog_description(yt_blog)
title = generate_blog_title(blog_meta_desc)
logger.info(f"Title is {title} and description is {blog_meta_desc}")
blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
# Get blog tags and categories.
blog_tags = get_blog_tags(blog_meta_desc)
logger.info(f"Blog tags are: {blog_tags}")
blog_categories = get_blog_categories(blog_meta_desc)
logger.info(f"Blog categories are: {blog_categories}")
# Generate an introduction for the blog
blog_intro = get_blog_intro(title, yt_blog)
logger.info(f"The Blog intro is:\n {blog_intro}")
blog_markdown_str = blog_markdown_str + "\n\n" + f"{blog_intro}" + "\n\n"
# Generate an image based on meta description
logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
# Get a variation of the yt url screenshot to use in the blog.
#varied_img_path = gen_new_from_given_img(yt_img_path, image_dir)
#logger.info(f"Image path: {main_img_path} and varied path: {varied_img_path}")
#blog_markdown_str = blog_markdown_str + f'![img-description]({os.path.basename(varied_img_path)})' + '_Image Caption_'
#stbdiff_img_path = generate_image(yt_img_path, image_dir, "stable_diffusion")
#logger.info(f"Image path: {main_img_path} from stable diffusion: {stbdiff_img_path}")
#blog_markdown_str = blog_markdown_str + f'![img-description]({os.path.basename(stbdiff_img_path)})' + f'_{title}_'
# Add the body of the blog content.
blog_markdown_str = blog_markdown_str + "\n\n" + f'{yt_blog}' + "\n\n"
# Get the Conclusion of the blog, by passing the generated blog.
blog_conclusion = get_blog_conclusion(blog_markdown_str)
# TBD: Add another image.
blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n\n" + f"{blog_conclusion}" + "\n"
# Proofread the blog, edit and remove dubplicates and refine it further.
# Presently, fixing the blog keywords to be tags and categories.
blog_keywords = f"{blog_tags} + {blog_categories}"
blog_markdown_str = blog_proof_editor(blog_markdown_str, blog_keywords)
# Check the type of blog format needed by the user.
if 'html' in output_format:
blog_markdown_str = convert_tomarkdown_format(blog_markdown_str)
elif 'markdown' in output_path:
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
# Try to save the blog content in a file, in whichever format. Just dump it.
try:
save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
except Exception as err:
logger.error("Failed to Save blog content: {blog_markdown_str}")
except Exception as e:
# raise assertionerror
logger.error(f"Error: Failed to generate_youtube_blog: {e}")
exit(1)

View File

@@ -0,0 +1,113 @@
def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
wordpress=False, research_online=False, output_format="HTML"):
"""
This function will take a blog Topic to first generate sections for it
and then generate content for each section.
"""
# Use to store the blog in a string, to save in a *.md file.
blog_markdown_str = ""
# TBD: Check if the generated topics are equal to what user asked.
blog_topic_arr = generate_blog_topics(blog_keywords, num_blogs, niche)
logger.info(f"Generated Blog Topics:---- \n{blog_topic_arr}\n")
# Split the string at newlines
blog_topic_arr = blog_topic_arr.split('\n')
# For each of blog topic, generate content.
for a_blog_topic in blog_topic_arr:
# if md/html
a_blog_topic = a_blog_topic.replace('"', '')
a_blog_topic = re.sub(r'^[\d.\s]+', '', a_blog_topic)
blog_markdown_str = "# " + a_blog_topic + "\n\n"
# Get the introduction specific to blog title and sub topics.
tpc_outlines = generate_topic_outline(a_blog_topic, num_subtopics)
tpc_outlines = tpc_outlines.split("\n")
blog_intro = get_blog_intro(a_blog_topic, tpc_outlines)
logger.info(f"The intro is:\n{blog_intro}")
blog_markdown_str = blog_markdown_str + "### Introduction" + "\n\n" + f"{blog_intro}" + "\n\n"
# Now, for each blog we have sub topic. Generate content for each of the sub topic.
for a_outline in tpc_outlines:
a_outline = a_outline.replace('"', '')
logger.info(f"Generating content for sub-topic: {a_outline}")
sub_topic_content = generate_topic_content(blog_keywords, a_outline)
# a_outline is sub topic heading, hence part ToC also.
#blog_markdown_str = blog_markdown_str + "\n\n" + f"### {a_outline}" + "\n\n"
blog_markdown_str = blog_markdown_str + "\n" + f"\n {sub_topic_content}" + "\n\n"
# Get the Conclusion of the blog, by passing the generated blog.
blog_conclusion = get_blog_conclusion(blog_markdown_str)
blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n" + f"{blog_conclusion}" + "\n"
# logger.info/check the final blog content.
logger.info(f"Final blog content: {blog_markdown_str}")
#if research_online:
# # Call on the got-researcher, tavily apis for this. So many apis floating around.
# report = do_online_research_on(blog_keywords)
# blog_markdown_str = blog_with_research(report, blog_markdown_str)
blog_meta_desc = generate_blog_description(blog_markdown_str)
logger.info(f"\nThe blog meta description is:{blog_meta_desc}\n")
# Generate an image based on meta description
logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
blog_tags = get_blog_tags(blog_markdown_str)
logger.info(f"\nBlog tags for generated content: {blog_tags}\n")
blog_categories = get_blog_categories(blog_markdown_str)
logger.info(f"Generated blog categories: {blog_categories}\n")
# Use chatgpt to convert the text into HTML or markdown.
if 'html' in output_format:
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
# Check if blog needs to be posted on wordpress.
if wordpress:
# Fixme: Fetch all tags and categories to check, if present ones are present and
# use them else create new ones. Its better to use chatgpt than string comparison.
# Similar tags and categories will be missed.
# blog_categories =
# blog_tags =
logger.info("Uploading the blog to wordpress.\n")
main_img_path = compress_image(main_img_path, quality=85)
try:
img_details = analyze_and_extract_details_from_image(main_img_path)
alt_text = img_details.get('alt_text')
img_description = img_details.get('description')
img_title = img_details.get('title')
caption = img_details.get('caption')
try:
media = upload_media(wordpress_url, wordpress_username, wordpress_password,
main_img_path, alt_text, img_description, img_title, caption)
except Exception as err:
sys.exit(f"Error occurred in upload_media: {err}")
except Exception as e:
sys.exit(f"Error occurred in analyze_and_extract_details_from_image: {e}")
# Then create the post with the uploaded media as the featured image
media_id = media['id']
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
try:
upload_blog_post(wordpress_url, wordpress_username, wordpress_password, a_blog_topic,
blog_markdown_str, media_id, blog_meta_desc, blog_categories, blog_tags, status='publish')
except Exception as err:
sys.exit(f"Failed to upload blog to wordpress.Error: {err}")
# TBD: Save the blog content as a .md file. Markdown or HTML ?
save_blog_to_file(blog_markdown_str,
a_blog_topic,
blog_meta_desc, blog_tags,
blog_categories, main_img_path)
# Now, we need perform some *basic checks on the blog content, such as:
# is_content_ai_generated.py, plagiarism_checker_from_known_sources.py
# seo_analyzer.py . These are present in the lib folder.
# prompt: Rewrite, improve and paraphrase [text] and use headings and subheadings
# to break up the content and make it easier to read using the keyword [keyword].

76
lib/save_blog_to_file.py Normal file
View File

@@ -0,0 +1,76 @@
def save_blog_to_file(blog_content, blog_title,
blog_meta_desc, blog_tags, blog_categories, main_img_path, file_type="md"):
""" Common function to save the generated blog to a file.
arg: file_type can be md or html
"""
# Convert the spaces in blog_title with dash
logger.info(f"The blog will be saved at: {output_path}")
logger.debug(f"Blog Title is: {blog_title}")
blog_title_md = blog_title
regex = re.compile('[^a-zA-Z0-9- ]')
blog_title_md = regex.sub('', blog_title_md)
blog_title= blog_title.replace(":", "")
blog_title_md = re.sub('--+', '-', blog_title_md)
blog_title_md = blog_title_md.replace(' ', '-')
blog_title_md = remove_stop_words(blog_title_md)
if ':' in blog_meta_desc:
blog_meta_desc = blog_meta_desc.split(':')[1].strip()
if not os.path.exists(output_path):
logger.error("Error: Blog output directory is set to {output_path}, which Does Not Exist.")
# Different output formats are plaintext, html and markdown.
if file_type in "md":
logger.info(f"Writing/Saving the resultant blog content in Markdown format.")
# fill the Front Matter as below at the top of the post: https://jekyllrb.com/docs/front-matter/
# date: YYYY-MM-DD HH:MM:SS +/-TTTT
from zoneinfo import ZoneInfo
tz=ZoneInfo('Asia/Kolkata')
dtobj = datetime.datetime.now(tz=ZoneInfo('Asia/Kolkata'))
formatted_date = f"{dtobj.strftime('%Y-%m-%d %H:%M:%S %z')}"
blog_frontmatter = f"""\
---
title: {blog_title}
date: {formatted_date}
categories: [{blog_categories}]
tags: [{blog_tags}]
description: {blog_meta_desc}
img_path: '/assets/'
image:
path: {os.path.basename(main_img_path)}
alt: {blog_title}
---\n\n"""
# Create a new file named YYYY-MM-DD-TITLE.EXTENSION and put it in the _posts of the root directory.
# Please note that the EXTENSION must be one of md or markdown
blog_output_path = os.path.join(
output_path,
f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
)
# Save the generated blog content to a file.
try:
with open(blog_output_path, "w") as f:
f.write(dedent(blog_frontmatter))
f.write(blog_content)
except Exception as e:
raise Exception(f"Failed to write blog content: {e}")
logger.info(f"\nSuccessfully saved and Posted blog at: {blog_output_path,}\n")
# Helper function
def remove_stop_words(sentence):
# Tokenize the sentence into words
words = nltk.word_tokenize(sentence)
# Get the list of English stop words
stop_words = set(stopwords.words('english'))
# Remove stop words from the sentence
filtered_words = [word for word in words if word.lower() not in stop_words]
# Join the filtered words back into a sentence
filtered_sentence = ' '.join(filtered_words)
return filtered_sentence

View File

@@ -5,63 +5,62 @@ from PIL import Image
from stability_sdk import client
import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
# Our Host URL should not be prepended with "https" nor should it have a trailing slash.
# Set the host URL environment variable. Ensure it doesn't have 'https' or a trailing slash.
os.environ['STABILITY_HOST'] = 'grpc.stability.ai:443'
# Sign up for an account at the following link to get an API Key.
# Ensure you sign up for an account to obtain an API key:
# https://platform.stability.ai/
# Click on the following link once you have created an account to be taken to your API Key.
# Your API key can be found here after account creation:
# https://platform.stability.ai/account/keys
def generate_stable_diffusion_image(prompt, image_dir):
# Set up our connection to the API.
# Check out the following link for a list of available engines:
# https://platform.stability.ai/docs/features/api-parameters#engine
stability_api = client.StabilityInference(
key=os.environ['STABILITY_KEY'], # API Key reference.
verbose=True, # Print debug messages.
engine="stable-diffusion-xl-1024-v1-0", # Set the engine to use for generation.
)
"""
Generate images using Stable Diffusion API based on a given prompt.
# Set up our initial generation parameters.
answers = stability_api.generate(
prompt=prompt,
seed=4253978046, # If a seed is provided, the resulting generated image will be deterministic.
# What this means is that as long as all generation parameters remain the same,
# you can always recall the same image simply by generating it again.
# Note: This isn't quite the case for Clip Guided generations, which we'll tackle in a future example notebook.
steps=50, # Amount of inference steps performed on image generation. Defaults to 30.
cfg_scale=7.0,
# Influences how strongly your generation is guided to match your prompt.
# Setting this value higher increases the strength in which it tries to match your prompt.
# Defaults to 7.0 if not specified.
width=1024, # Generation width, defaults to 512 if not included.
height=1024, # Generation height, defaults to 512 if not included.
samples=1, # Number of images to generate, defaults to 1 if not included.
sampler=generation.SAMPLER_K_DPMPP_2M
# Choose which sampler we want to denoise our generation with.
# Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
# (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral,
# k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
)
Args:
prompt (str): The prompt to generate the image.
image_dir (str): The directory where the image will be saved.
# Set up our warning to print to the console if the adult content classifier is tripped.
# If adult content classifier is not tripped, save generated images.
for resp in answers:
for artifact in resp.artifacts:
if artifact.finish_reason == generation.FILTER:
warnings.warn(
"Your request activated the API's safety filters and could not be processed."
"Please modify the prompt and try again.")
if artifact.type == generation.ARTIFACT_IMAGE:
img = Image.open(io.BytesIO(artifact.binary))
img_name = image_dir + str(artifact.seed) + ".png"
img.show()
img.save(img_name)
# Save our generated images with their seed number as the filename.
Raises:
Warning: If the adult content classifier is triggered.
Exception: For any issues during image generation or saving.
"""
try:
# Initialize the StabilityInference client with the API key and other settings.
stability_api = client.StabilityInference(
key=os.environ['STABILITY_KEY'], # Reference to the API key.
verbose=True, # Enable verbose mode for debug messages.
engine="stable-diffusion-xl-1024-v1-0", # Engine used for generation.
)
prompt = "An image of a digital marketing campaign with various elements such as social media ads, email marketing, data analysis, and customer interaction. The image should depict the integration of generative AI technologies, such as machine learning algorithms and neural networks, into the digital marketing process. It should showcase how these technologies revolutionize the field by enhancing efficiency, personalization, creativity, decision making, and customer experience. The image should also illustrate the potential for better return on investment (ROI) and hyper-personalization through generative AI in digital marketing."
# Generating the image with specified parameters.
answers = stability_api.generate(
prompt=prompt,
seed=4253978046, # Deterministic seed for reproducible results.
steps=50, # Number of inference steps.
cfg_scale=7.0, # Strength of prompt matching.
width=1024, height=1024, # Image dimensions.
samples=1, # Number of images to generate.
sampler=generation.SAMPLER_K_DPMPP_2M # Denoising sampler selection.
)
# Process responses and save images.
for resp in answers:
for artifact in resp.artifacts:
if artifact.finish_reason == generation.FILTER:
warnings.warn(
"Request activated safety filters. Modify the prompt and retry."
)
if artifact.type == generation.ARTIFACT_IMAGE:
img = Image.open(io.BytesIO(artifact.binary))
img_name = os.path.join(image_dir, f"{artifact.seed}.png")
img.show()
img.save(img_name) # Save the image with the seed in the filename.
except Exception as e:
raise Exception(f"Error during image generation or saving: {e}")
# Example usage:
# generate_stable_diffusion_image("A futuristic cityscape", "/path/to/save/images/")
image_dir = '/home/ajsingh/pseo_experiments/lib'
generate_stable_diffusion_image(prompt, image_dir)

44
lib/toc_example.py Normal file
View File

@@ -0,0 +1,44 @@
from bs4 import BeautifulSoup
import re
def create_table_of_contents(html_content):
"""
Create a table of contents for a given HTML content.
Args:
html_content (str): HTML content of the blog post.
Returns:
str: HTML content with a table of contents.
"""
# Use BeautifulSoup to parse the HTML
soup = BeautifulSoup(html_content, 'html.parser')
# Find all header tags (h1, h2, h3, h4, h5, h6)
headers = soup.find_all(re.compile('^h[1-6]$'))
# Create a table of contents
toc = BeautifulSoup('<div id="table-of-contents"><h2>Table of Contents</h2><ul></ul></div>', 'html.parser')
toc_ul = toc.find('ul')
# Loop through headers and add them to the table of contents
for i, header in enumerate(headers, start=1):
header_id = f"toc_{i}"
header['id'] = header_id
toc_entry = soup.new_tag('li')
toc_link = soup.new_tag('a', href=f"#{header_id}")
toc_link.string = header.get_text()
toc_entry.append(toc_link)
toc_ul.append(toc_entry)
# Insert the table of contents at the beginning of the content
soup.insert(0, toc)
return str(soup)
# Example usage
html_content = "<h1>Title</h1><p>Some text</p><h2>Subtitle 1</h2><p>Text under subtitle 1</p><h2>Subtitle 2</h2><p>Text under subtitle 2</p>"
html_with_toc = create_table_of_contents(html_content)
print(html_with_toc)

View File

@@ -14,23 +14,34 @@ logger.add(sys.stdout,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from .gpt_providers.openai_gpt_provider import openai_chatgpt, openai_chatgpt_streaming_text, speech_to_text
from .gpt_providers.stt_audio_blog import speech_to_text
from .gpt_providers.openai_chat_completion import openai_chatgpt
def youtube_to_blog(video_url):
"""Function to transcribe a given youtube url """
# fixme: Doesnt work all types of yt urls.
vid_id = video_url.split("=")[1]
hti = Html2Image(output_path="../blog_images")
hti.screenshot(url=video_url, save_as=f"yt-img-{vid_id}.png")
yt_img_path = os.path.join("../blog_images", f"yt-img-{vid_id}.png")
#hti = Html2Image(output_path="../blog_images")
#hti.screenshot(url=video_url, save_as=f"yt-img-{vid_id}.png")
#yt_img_path = os.path.join("../blog_images", f"yt-img-{vid_id}.png")
try:
audio_text = speech_to_text(video_url)
audio_blog_content = summarize_youtube_video(audio_text)
return(yt_img_path, audio_blog_content)
# Starting the speech-to-text process
logger.info("Starting with Speech to Text.")
audio_text, audio_title = speech_to_text(video_url)
except Exception as e:
logger.error(f"Error: Failed to transcribe YouTube video_url: {video_url} with error: {e}")
logger.error(f"Error in speech_to_text: {e}")
sys.exit(1) # Exit the program due to error in speech_to_text
try:
# Summarizing the content of the YouTube video
audio_blog_content = summarize_youtube_video(audio_text)
return audio_blog_content, audio_title
except Exception as e:
logger.error(f"Error in summarize_youtube_video: {e}")
sys.exit(1) # Exit the program due to error in summarize_youtube_video
return audio_blog_content
def summarize_youtube_video(user_content):
@@ -42,6 +53,7 @@ def summarize_youtube_video(user_content):
Returns:
A string containing the summary of the video.
"""
logger.info("Start summarize_youtube_video..")
prompt = f"""
You are an expert copywriter specializing in content optimization for SEO.
Your task is to transform a given transcript into a well-structured and engaging blog article. Your objectives include:
@@ -65,6 +77,8 @@ def summarize_youtube_video(user_content):
Follow the above guidelines to create a well-optimized, unique, and informative article that will rank well in search engine results and engage readers effectively.
Craft a blog content from the following transcript:\n{user_content}
"""
#completion_text = openai_chatgpt_streaming_text(prompt)
completion_text = openai_chatgpt(prompt)
return completion_text
try:
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in generating blog summary: {err}")