Blogen-V0.1 AI blog writer. Video, Image, Research and write blogs.

This commit is contained in:
AjaySi
2023-11-09 13:22:30 +05:30
parent eb87847124
commit d91688055b
118 changed files with 61599 additions and 3142 deletions

View File

@@ -10,14 +10,93 @@
import json
import os
import datetime #I wish
import sys
import openai
from tqdm import tqdm, trange
import time
import re
from textwrap import dedent
import nltk
nltk.download('punkt')
from nltk.corpus import stopwords
nltk.download('stopwords')
from .gpt_providers.openai_gpt_provider import openai_chatgpt
from .gpt_providers.openai_gpt_provider import openai_chatgpt, gen_new_from_given_img
from .generate_image_from_prompt import generate_image
from .write_blogs_from_youtube_videos import youtube_to_blog
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
# fixme: Remove the hardcoding, need add another option OR in config ?
image_dir = "pseo_website/assets/"
image_dir = os.path.join(os.getcwd(), image_dir)
# TBD: This can come from config file.
output_path = "pseo_website/_posts/"
output_path = os.path.join(os.getcwd(), output_path)
def generate_youtube_blog(yt_url_list):
"""Takes a list of youtube videos and generates blog for each one of them.
"""
# Use to store the blog in a string, to save in a *.md file.
blog_markdown_str = ""
for a_yt_url in yt_url_list:
try:
yt_img_path, yt_blog = youtube_to_blog(a_yt_url)
# Get the title and meta description of the blog.
title = generate_blog_title(yt_blog)
blog_meta_desc = generate_blog_description(yt_blog)
logger.info(f"Title is {title} and description is {blog_meta_desc}")
#blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
# Generate an introduction for the blog
blog_intro = get_blog_intro(title, yt_blog)
logger.info(f"The Blog intro is:\n {blog_intro}")
blog_markdown_str = blog_markdown_str + "\n\n" + f"{blog_intro}" + "\n\n"
# Generate an image based on meta description
logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
# Get a variation of the yt url screenshot to use in the blog.
#varied_img_path = gen_new_from_given_img(yt_img_path, image_dir)
#logger.info(f"Image path: {main_img_path} and varied path: {varied_img_path}")
#blog_markdown_str = blog_markdown_str + f'![img-description]({os.path.basename(varied_img_path)})' + '_Image Caption_'
stbdiff_img_path = generate_image(yt_img_path, image_dir, "stable_diffusion")
logger.info(f"Image path: {main_img_path} from stable diffusion: {stbdiff_img_path}")
blog_markdown_str = blog_markdown_str + f'![img-description]({os.path.basename(stbdiff_img_path)})' + f'_{title}_'
# Add the body of the blog content.
blog_markdown_str = blog_markdown_str + "\n\n" + f'{yt_blog}' + "\n\n"
# Get the Conclusion of the blog, by passing the generated blog.
blog_conclusion = get_blog_conclusion(blog_markdown_str)
# TBD: Add another image.
blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n\n" + f"{blog_conclusion}" + "\n"
print(f"Conclusion: {blog_markdown_str}")
# Get blog tags and categories.
blog_tags = get_blog_tags(yt_blog)
logger.info(f"Blog tags are: {blog_tags}")
blog_categories = get_blog_categories(yt_blog)
logger.info(f"Blog categories are: {blog_categories}")
save_blog_to_file(blog_markdown_str, title, blog_meta_desc, blog_tags, blog_categories, main_img_path)
#html_blog = convert_markdown_to_html(blog_markdown_str)
#print(html_blog)
except Exception as e:
# raise assertionerror
logger.info(f"Error: Failed to generate_youtube_blog: {e}")
exit(1)
def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics):
@@ -25,22 +104,12 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics):
This function will take a blog Topic to first generate sections for it
and then generate content for each section.
"""
# I want you to act as a blogger and you want to write a blog post about [topic],
# with a friendly and approachable tone that engages readers.
# Your target audience is [define your target audience].
# Write in a personal style using singular first-person pronouns only.
# I want you to include these keywords: [keyword 1], [keyword 2], [keyword 3] throughout the article.
# Format your response using markdown.
# Use headings, subheadings, bullet points, and bold to organize the information.
# Answer the most commonly asked questions about the topic at the end of the article.
# Create a list of the most popular tools used by the [Field of Interest] professionals with the pros and cons of each tool.
# Use to store the blog in a string, to save in a *.md file.
blog_markdown_str = ""
# TBD: Check if the generated topics are equal to what user asked.
blog_topic_arr = generate_blog_topics(blog_keywords, num_blogs, niche)
print(f"Generated Blog Topics:---- {blog_topic_arr}\n")
logger.info(f"Generated Blog Topics:---- {blog_topic_arr}\n")
# For each of blog topic, generate content.
for a_blog_topic in blog_topic_arr:
@@ -48,13 +117,15 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics):
blog_markdown_str = "# " + a_blog_topic.replace('"', '') + "\n\n"
# Get the introduction specific to blog title and sub topics.
tpc_outlines = generate_topic_outline(a_blog_topic, num_subtopics)
blog_intro = get_blog_intro(a_blog_topic, tpc_outlines)
print(f"The intro is:\n {blog_intro}")
logger.info(f"The intro is:\n {blog_intro}")
blog_markdown_str = blog_markdown_str + "### Introduction" + "\n\n" + f"{blog_intro}" + "\n\n"
# Now, for each blog we have sub topic. Generate content for each of the sub topic.
for a_outline in tpc_outlines:
sub_topic_content = generate_topic_content(blog_keywords, a_outline)
print(f"Generating content for sub-topic: {a_outline}")
logger.info(f"Generating content for sub-topic: {a_outline}")
# a_outline is sub topic heading, hence part ToC also.
blog_markdown_str = blog_markdown_str + "\n\n" + f"### {a_outline}" + "\n\n"
blog_markdown_str = blog_markdown_str + "\n" + f"\n {sub_topic_content}" + "\n\n"
@@ -64,20 +135,21 @@ def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics):
blog_conclusion = get_blog_conclusion(blog_markdown_str)
blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n" + f"{blog_conclusion}" + "\n"
# print/check the final blog content.
print(f"Final blog content: {blog_markdown_str}")
# logger.info/check the final blog content.
logger.info(f"Final blog content: {blog_markdown_str}")
blog_meta_desc = generate_blog_description(blog_markdown_str)
print(f"\nGet the blog meta description:{blog_meta_desc}")
logger.info(f"\nGet the blog meta description:{blog_meta_desc}")
blog_tags = get_blog_tags(blog_markdown_str)
print(f"\nBlog tags for generated content: {blog_tags}")
logger.info(f"\nBlog tags for generated content: {blog_tags}")
blog_categories = get_blog_categories(blog_markdown_str)
print(f"Generated blog categories: {blog_categories}")
logger.info(f"Generated blog categories: {blog_categories}")
# TBD: Save the blog content as a .md file. Markdown or HTML ?
save_blog_to_file(blog_markdown_str, a_blog_topic, blog_meta_desc, blog_tags, blog_categories)
exit(1)
# Use chatgpt to convert the text into HTML or markdown.
# Now, we need perform some *basic checks on the blog content, such as:
@@ -104,24 +176,36 @@ def generate_blog_topics(blog_keywords, num_blogs, niche):
more_keywords = get_related_keywords(num_blogs, blog_keywords, niche)
prompt = prompt + """Use the following keywords wisely, without keyword stuffing: {more_keywords}"""
print(f"prompt used for blog titles: {prompt}\n")
logger.info(f"prompt used for blog topics: {prompt}\n")
# Calculate the max tokens based on the number of blogs
max_tokens = min(1000, num_blogs * 100)
try:
response = openai_chatgpt(
prompt,
model="text-davinci-003",
temperature=0.1,
max_tokens=max_tokens,
top_p=1,
n=1
)
topic_list = extract_key_text(response)
return(topic_list)
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in generating blog topics: {err}")
def generate_blog_title(blog_meta_desc):
"""
Given a blog title generate an outline for it
"""
# TBD: Remove hardcoding, make dynamic
prompt = f"""As a SEO expert and content writer, I will provide you with blog. Your task is write title for it.
Follows SEO best practises to suggest the blog title.
Please keep the titles concise, not exceeding 60 words, and ensure to maintain their meaning.
Respond with only one title and no description, for this given blog content: {blog_meta_desc}
"""
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
# TBD: Include --niche
logger.debug(f"Prompt used for blog title :{prompt}")
try:
response = openai_chatgpt(prompt)
except Exception as err:
SystemError(f"Error in generating Blog Title: {err}")
return response
def generate_topic_outline(blog_title, num_subtopics):
"""
Given a blog title generate an outline for it
@@ -132,20 +216,13 @@ def generate_topic_outline(blog_title, num_subtopics):
"""
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
# TBD: Include --niche
print(f"prompt used for blog title Outline :{prompt}")
logger.info(f"\nPrompt used for blog title Outline :{prompt}\n\n")
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(
prompt,
model="text-davinci-003",
temperature=0.1,
max_tokens=100,
top_p=0.9,
n=1
)
text_values = []
for choice in response["choices"]:
text_values.extend(choice["text"].split("\n"))
return ([element for element in text_values if element])
try:
response = openai_chatgpt(prompt)
except Exception as err:
SystemError(f"Error in generating Blog Title: {err}")
return response
def generate_topic_content(blog_keywords, sub_topic):
@@ -162,11 +239,10 @@ def generate_topic_content(blog_keywords, sub_topic):
"Format your response using markdown. Use headings, subheadings, bullet points, and bold to organize the information."
)
try:
response = openai_chatgpt(prompt)
response = openai_chatgpt(
prompt,
model="text-davinci-003",
temperature=0.7,
model="gpt-3.5-turbo",
temperature=0.2,
max_tokens=1000,
top_p=0.9,
n=1
@@ -185,26 +261,18 @@ def get_blog_intro(blog_title, blog_topics):
"""
Generate blog introduction as per title and sub topics
"""
prompt = f"""As a professional writer, craft a captivating, inviting, and concise (no more than 550 characters).
The introduction should compel readers to delve deeper into the blog post,
titled: {blog_title} with the following sub-topics: {blog_topics}
"""
prompt = f"""As a skilled wordsmith, I'll equip you with a blog title and relevant topics, tasking you with crafting an engaging introduction. Your challenge: Create a brief, compelling entry that entices readers to explore the entire post. This introduction must be concise (under 250 characters) yet powerful, clearly stating the blog's purpose and what readers stand to gain.
Intrigue your audience from the start with vibrant language, employing strong verbs and vivid descriptions. Address a common challenge your readers face, demonstrating empathy and positioning yourself as their go-to expert. Pose thought-provoking questions that prompt reader engagement and contemplation.
Remember, your words matter. This introduction serves as the cornerstone of the blog post. It should not only captivate attention but also encourage deeper exploration. Additionally, strategically integrate relevant keywords to enhance visibility on search engine results pages (SERPs). Your mission: Craft an introduction that resonates, leaving readers eager to delve further into the titled piece: '{blog_title}', covering these intriguing sub-topics: {blog_topics}."""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(
prompt,
model="text-davinci-003",
temperature=0.1,
max_tokens=1000,
top_p=0.9,
n=1
)
text_values = []
for choice in response["choices"]:
text_values.extend(choice["text"].split("\n"))
return (' '.join([element for element in text_values if element]))
response = openai_chatgpt(prompt)
except Exception as err:
SystemError(f"Error in generating topic content: {err}")
SystemError(f"Error in generating Blog Introduction: {err}")
return response
def get_blog_conclusion(blog_content):
@@ -214,54 +282,31 @@ def get_blog_conclusion(blog_content):
prompt = f"""As an expert SEO and blog writer, please conclude the given blog providing vital take aways,
summarise key points (no more than 300 characters) in bullet points. The blog content: {blog_content}
"""
print(f"Generating blog conclusion iwth prompt: {prompt}")
logger.info(f"Generating blog conclusion iwth prompt: {prompt}")
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(
prompt,
model="text-davinci-003",
temperature=0.1,
max_tokens=450,
top_p=0.7,
n=1
)
text_values = []
for choice in response["choices"]:
text_values.extend(choice["text"].split("\n"))
return (' '.join([element for element in text_values if element]))
response = openai_chatgpt(prompt)
except Exception as err:
SystemError(f"Error in generating blog conclusion: {err}")
else:
return response
def generate_blog_description(blog_content):
"""
Prompt designed to give SEO optimized blog descripton
"""
# Suggest keywords that I should include in my meta description for my blog post on [topic]
# I want to generate high CTR meta and keyword rich meta title and meta descriptions in text format.
# My keywords are [keyword 1], [keyword 2], [keyword 3]
# Suggest a meta description for the content above, make it user-friendly
# and with a call to action, include the keyword [keyword].
prompt = f"""As an expert SEO and blog writer, write meta description for given blog content.
The description should be between 150 and 160 characters long, uses strong, active verbs,
avoids using all caps or excessive punctuation, and is relevant to the blog content.
It should be engaging and encourages users to click on the link.
The blog content: {blog_content}"""
prompt = f"""As an expert SEO and blog writer, Compose a compelling meta description for the given blog content,
adhering to SEO best practices. Keep it between 150-160 characters, incorporating active verbs,
avoiding all caps and excessive punctuation. Ensure relevance, engage users, and encourage clicks.
Use keywords naturally and provide a glimpse of the content's value to entice readers.
Respond with only one of your best effort and do not include your explanations.
Blog Content: {blog_content}"""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(
prompt,
model="text-davinci-003",
temperature=0.1,
max_tokens=450,
top_p=0.7,
n=1
)
text_values = []
for choice in response["choices"]:
text_values.extend(choice["text"].split("\n"))
return (' '.join([element for element in text_values if element]))
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in generating blog description: {err}")
@@ -271,92 +316,87 @@ def get_blog_tags(blog_article):
Function to suggest tags for the given blog content
"""
# Suggest at least 5 tags for the following blog post [Enter your blog post text here].
prompt = f"""As an expert SEO and blog writer, suggest 3 to 5 relevant, specific,
and popular tags that are unique and consistent to improve the visibility
and discoverability of following blog content: {blog_article}"
"""
prompt = f"""As an expert SEO and blog writer, suggest only 2 relevant and specific blog tags
for the given blog content. Only reply with comma separated values.
Blog content: {blog_article}."""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(
prompt,
model="text-davinci-003",
temperature=0.1,
max_tokens=450,
top_p=0.7,
n=1
)
text_values = []
for choice in response["choices"]:
text_values.extend(choice["text"].split("\n"))
return (' '.join([element for element in text_values if element]))
response = openai_chatgpt(prompt)
except Exception as err:
SystemError(f"Error in generating blog tags: {err}")
else:
return response
def get_blog_categories(blog_article):
"""
Function to generate blog categories for given blog content.
"""
prompt = f"""As an expert SEO and content writer, Suggest 2-3 blog categories by identifying
the main topic, most relevant categories, considering the target
audience and the blog's category taxonomy for the following blog content: {blog_article}"
prompt = f"""As an expert SEO and content writer, I will provide you with blog content.
Suggest only 2 blog categories which are most relevant to provided blog content,
by identifying the main topic. Also consider the target audience and the
blog's category taxonomy. Only reply with comma separated values. The blog content is: {blog_article}"
"""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(
prompt,
model="text-davinci-003",
temperature=0.1,
max_tokens=100,
top_p=0.7,
n=1
)
text_values = []
for choice in response["choices"]:
text_values.extend(choice["text"].split("\n"))
return (' '.join([element for element in text_values if element]))
response = openai_chatgpt(prompt)
except Exception as err:
SystemError(f"Error in generating blog categories: {err}")
else:
return response
def save_blog_to_file(blog_content, blog_title,
blog_meta_desc, blog_tags, blog_categories, file_type="md"
):
blog_meta_desc, blog_tags, blog_categories, main_img_path, file_type="md"):
""" Common function to save the generated blog to a file.
arg: file_type can be md or html
"""
# TBD: This can come from config file.
output_path = "pseo_website/_posts/"
output_path = os.path.join(os.getcwd(), output_path)
# Convert the spaces in blog_title with dash
logger.info(f"The blog will be saved at: {output_path}")
logger.debug(f"Blog Title is: {blog_title}")
blog_title_md = blog_title
regex = re.compile('[^a-zA-Z0-9- ]')
blog_title = regex.sub('', blog_title)
blog_title = re.sub('--+', '-', blog_title)
blog_title = blog_title.replace(' ', '-')
blog_title_md = regex.sub('', blog_title_md)
blog_title= blog_title.replace(":", "")
blog_title_md = re.sub('--+', '-', blog_title_md)
blog_title_md = blog_title_md.replace(' ', '-')
blog_title_md = remove_stop_words(blog_title_md)
if ':' in blog_meta_desc:
blog_meta_desc = blog_meta_desc.split(':')[1].strip()
if not os.path.exists(output_path):
# If the directory does not exist, create it
#os.makedirs(output_path)
print("Error: Blog output directory is set to {output_path}, which Does Not Exist.")
logger.error("Error: Blog output directory is set to {output_path}, which Does Not Exist.")
# Different output formats are plaintext, html and markdown.
if file_type in "md":
logger.info(f"Writing/Saving the resultant blog content in Markdown format.")
# fill the Front Matter as below at the top of the post: https://jekyllrb.com/docs/front-matter/
# date: YYYY-MM-DD HH:MM:SS +/-TTTT
formatted_date = f"{datetime.datetime.now():%Y-%m-%d %H:%M:%S %z}"
blog_frontmatter = """---
title: {blog_title}
date: {formatted_date}
categories: [{blog_categories}]
tags: [{blog_tags}]
description: {blog_meta_desc}
img_path: '/posts/20180809'
---\n\n"""
from zoneinfo import ZoneInfo
tz=ZoneInfo('Asia/Kolkata')
dtobj = datetime.datetime.now(tz=ZoneInfo('Asia/Kolkata'))
formatted_date = f"{dtobj.strftime('%Y-%m-%d %H:%M:%S %z')}"
blog_frontmatter = f"""\
---
title: {blog_title}
date: {formatted_date}
categories: [{blog_categories}]
tags: [{blog_tags}]
description: {blog_meta_desc}
img_path: '/assets/'
image:
path: {os.path.basename(main_img_path)}
alt: {blog_title}
---\n\n"""
# Create a new file named YYYY-MM-DD-TITLE.EXTENSION and put it in the _posts of the root directory.
# Please note that the EXTENSION must be one of md or markdown
blog_output_path = os.path.join(
output_path,
f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title}.md"
f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
)
# Save the generated blog content to a file.
try:
@@ -365,44 +405,7 @@ def save_blog_to_file(blog_content, blog_title,
f.write(blog_content)
except Exception as e:
raise Exception(f"Failed to write blog content: {e}")
print(f"\nSuccessfully saved and Posted blog at: {blog_output_path,}\n")
def extract_key_text(json_data):
"""Extracts key text from a given JSON object.
Args:json_data: A JSON object.
Returns: A list of strings containing the key text.
Raises: ValueError: If the JSON object is not valid.
"""
try:
# Extract the "choices" key from the JSON object.
choices = json_data["choices"]
# Iterate over the "choices" list and extract the "text" key from each item.
key_text = []
for choice in choices:
text = choice["text"]
# Split the text into a list of sentences.
sentences = text.split("\n")
# Iterate over the list of sentences and extract the first sentence.
for sentence in sentences:
# The generated topics usually have 1) or ^\W*\D* . Remove them from prompt.
new_str = sentence.replace("'", '')
new_str = re.sub(r'^(\d*\.)', '', new_str)
key_text.append(new_str)
# Remove duplicate key text.
key_text = list(set(key_text))
# Remove empty values.
key_text = [i for i in key_text if i]
return key_text
except KeyError as e:
raise ValueError(f"Missing key in JSON object: {e.args[0]}")
except TypeError as e:
raise ValueError(f"Invalid JSON object: {e.args[0]}")
logger.info(f"\nSuccessfully saved and Posted blog at: {blog_output_path,}\n")
def get_related_keywords(num_blogs, keywords, niche):
@@ -422,37 +425,104 @@ def get_related_keywords(num_blogs, keywords, niche):
)
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(
prompt,
model="text-davinci-003",
temperature=0.7,
max_tokens=100,
top_p=0.9,
n=10
)
# Extract the keywords from the response
keywords = []
for choice in response.choices:
# Split the response into words
words = choice.text.split(" ")
# Add the words to the list of keywords
for text in words:
# Remove digits
text = re.sub(r'\d', '', text)
# Remove special characters
text = re.sub(r'[^\w\s]', '', text)
# Remove newline characters
text = text.replace('\n', '')
keywords.append(text)
# Remove any duplicate keywords
keywords = set(keywords)
# Return the list of keywords
return (' '.join(keywords))
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in getting related keywords.")
def convert_markdown_to_html(md_content):
""" Helper function to convert given text to HTML
"""
html_response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-16k",
messages=[
{"role": "system", "content": """
Convert Markdown to HTML:
You are a skilled developer tasked with converting a Markdown-formatted text to HTML. You will be given text in markdown format. Follow these steps to perform the conversion:
1. Parse User's Markdown Input: You will receive a Markdown-formatted text as input from the user. Carefully analyze the provided Markdown text, paying attention to different elements such as headings (#), lists (unordered and ordered), bold and italic text, links, images, and code blocks.
2. Generate and Validate HTML: Generate corresponding HTML code for each Markdown element following the conversion guidelines below. Ensure the generated HTML is well-structured and syntactically correct.
3. Preserve Line Breaks: Markdown line breaks (soft breaks) represented by two spaces at the end of a line should be converted to <br> tags in HTML to preserve the line breaks.
4. REMEMBER to generate complete, valid HTML response only.
Follow below Conversion Guidelines:
- Headers: Convert Markdown headers (#, ##, ###, etc.) to corresponding HTML header tags (<h1>, <h2>, <h3>, etc.).
- Lists: Convert unordered lists (*) and ordered lists (1., 2., 3., etc.) to <ul> and <ol> HTML tags, respectively. List items should be enclosed in <li> tags.
- Emphasis: Convert bold (**) and italic (*) text to <strong> and <em> HTML tags, respectively.
- Links: Convert Markdown links ([text](url)) to HTML anchor (<a>) tags. Ensure the href attribute contains the correct URL.
- Images: Convert Markdown image tags (![alt text](image_url)) to HTML image (<img>) tags. Include the alt attribute for accessibility.
- Code: Convert inline code (`code`) to <code> HTML tags. Convert code blocks (```) to <pre> HTML tags for preserving formatting.
- Blockquotes: Convert blockquotes (>) to <blockquote> HTML tags.
"""
},
{"role": "user", "content": f"Convert the following Markdown text to HTML:\n\n{md_content}"}
],
max_tokens=8192,
temperature=1,
n=1,
stream=True
)
for chunk in response:
print(chunk)
logger.info("Finished converting markdown to html.")
if "choices" in html_response and len(html_response["choices"]) > 0:
return html_response["choices"][0]["message"]["content"]
else:
return None
# Helper function
def remove_stop_words(sentence):
# Tokenize the sentence into words
words = nltk.word_tokenize(sentence)
# Get the list of English stop words
stop_words = set(stopwords.words('english'))
# Remove stop words from the sentence
filtered_words = [word for word in words if word.lower() not in stop_words]
# Join the filtered words back into a sentence
filtered_sentence = ' '.join(filtered_words)
return filtered_sentence
def convert_markdown_to_html(md_content):
""" Helper function to convert given text to HTML
"""
html_response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-16k",
messages=[
{"role": "system", "content": """
Convert Markdown to HTML:
You are a skilled developer tasked with converting a Markdown-formatted text to HTML. You will be given text in markdown format. Follow these steps to perform the conversion:
1. Parse User's Markdown Input: You will receive a Markdown-formatted text as input from the user. Carefully analyze the provided Markdown text, paying attention to different elements such as headings (#), lists (unordered and ordered), bold and italic text, links, images, and code blocks.
2. Generate and Validate HTML: Generate corresponding HTML code for each Markdown element following the conversion guidelines below. Ensure the generated HTML is well-structured and syntactically correct.
3. Preserve Line Breaks: Markdown line breaks (soft breaks) represented by two spaces at the end of a line should be converted to <br> tags in HTML to preserve the line breaks.
4. REMEMBER to generate complete, valid HTML response only.
Follow below Conversion Guidelines:
- Headers: Convert Markdown headers (#, ##, ###, etc.) to corresponding HTML header tags (<h1>, <h2>, <h3>, etc.).
- Lists: Convert unordered lists (*) and ordered lists (1., 2., 3., etc.) to <ul> and <ol> HTML tags, respectively. List items should be enclosed in <li> tags.
- Emphasis: Convert bold (**) and italic (*) text to <strong> and <em> HTML tags, respectively.
- Links: Convert Markdown links ([text](url)) to HTML anchor (<a>) tags. Ensure the href attribute contains the correct URL.
- Images: Convert Markdown image tags (![alt text](image_url)) to HTML image (<img>) tags. Include the alt attribute for accessibility.
- Code: Convert inline code (`code`) to <code> HTML tags. Convert code blocks (```) to <pre> HTML tags for preserving formatting.
- Blockquotes: Convert blockquotes (>) to <blockquote> HTML tags.
"""
},
{"role": "user", "content": f"Convert the following Markdown text to HTML:\n\n{md_content}"}
],
max_tokens=8192,
temperature=1,
n=1,
)
logger.info("Finished converting markdown to html.")
if "choices" in html_response and len(html_response["choices"]) > 0:
return html_response["choices"][0]["message"]["content"]
else:
return None