Blogen-V0.1 Added features. WIP

This commit is contained in:
AjaySi
2023-12-21 21:21:09 +05:30
parent eaf13c2d16
commit 8f89de7b69
21 changed files with 775 additions and 471 deletions

View File

@@ -1,76 +1,101 @@
def save_blog_to_file(blog_content, blog_title,
blog_meta_desc, blog_tags, blog_categories, main_img_path, file_type="md"):
""" Common function to save the generated blog to a file.
arg: file_type can be md or html
import sys
import os
import re
import datetime
from textwrap import dedent
import logging
from zoneinfo import ZoneInfo
import nltk
from nltk.corpus import stopwords
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
def save_blog_to_file(blog_content, blog_title, blog_meta_desc, blog_tags, blog_categories, main_img_path, output_path, file_type="md"):
"""
# Convert the spaces in blog_title with dash
logger.info(f"The blog will be saved at: {output_path}")
logger.debug(f"Blog Title is: {blog_title}")
blog_title_md = blog_title
regex = re.compile('[^a-zA-Z0-9- ]')
blog_title_md = regex.sub('', blog_title_md)
blog_title= blog_title.replace(":", "")
blog_title_md = re.sub('--+', '-', blog_title_md)
blog_title_md = blog_title_md.replace(' ', '-')
Saves the provided blog content to a file in the specified format.
Args:
blog_content (str): The main content of the blog.
blog_title (str): Title of the blog.
blog_meta_desc (str): Meta description of the blog.
blog_tags (list): List of tags associated with the blog.
blog_categories (list): List of categories associated with the blog.
main_img_path (str): Path to the main image of the blog.
output_path (str): Path to the directory where the blog will be saved.
file_type (str, optional): The file format for saving the blog ('md' for Markdown or 'html' for HTML). Defaults to 'md'.
Raises:
FileNotFoundError: If the output_path does not exist.
Exception: If the blog content cannot be written to the file.
"""
# Sanitize and prepare the blog title
# Remove colon and ampersand
blog_title_md = blog_title.replace(":", "").replace("&", "")
# Replace spaces with hyphens
blog_title_md = blog_title_md.replace(" ", "-")
blog_title_md = re.sub('[^A-Za-z0-9-]', '', blog_title_md)
# Replace multiple consecutive dashes with a single dash
blog_title_md = re.sub('-+', '-', blog_title_md)
blog_title_md = remove_stop_words(blog_title_md)
logger.debug(f"Blog Title is: {blog_title_md}")
if ':' in blog_meta_desc:
blog_meta_desc = blog_meta_desc.split(':')[1].strip()
# Check if output path exists
if not os.path.exists(output_path):
logger.error("Error: Blog output directory is set to {output_path}, which Does Not Exist.")
logger.error(f"Error: Blog output directory is set to {output_path}, which does not exist.")
raise FileNotFoundError(f"Output directory does not exist: {output_path}")
# Different output formats are plaintext, html and markdown.
if file_type in "md":
logger.info(f"Writing/Saving the resultant blog content in Markdown format.")
# fill the Front Matter as below at the top of the post: https://jekyllrb.com/docs/front-matter/
# date: YYYY-MM-DD HH:MM:SS +/-TTTT
from zoneinfo import ZoneInfo
tz=ZoneInfo('Asia/Kolkata')
dtobj = datetime.datetime.now(tz=ZoneInfo('Asia/Kolkata'))
formatted_date = f"{dtobj.strftime('%Y-%m-%d %H:%M:%S %z')}"
blog_frontmatter = f"""\
# Handle Markdown file type
if file_type == "md":
logger.info("Writing/Saving the resultant blog content in Markdown format.")
dtobj = datetime.datetime.now(ZoneInfo('Asia/Kolkata'))
formatted_date = dtobj.strftime('%Y-%m-%d %H:%M:%S %z')
blog_title = blog_title.replace(":", "-").replace('"', '')
blog_frontmatter = dedent(f"""\
---
title: {blog_title}
date: {formatted_date}
categories: [{blog_categories}]
tags: [{blog_tags}]
description: {blog_meta_desc}
description: {blog_meta_desc.replace(":", "-")}
img_path: '/assets/'
image:
path: {os.path.basename(main_img_path)}
alt: {blog_title}
---\n\n"""
---\n\n""")
# Create a new file named YYYY-MM-DD-TITLE.EXTENSION and put it in the _posts of the root directory.
# Please note that the EXTENSION must be one of md or markdown
blog_output_path = os.path.join(
output_path,
f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
)
# Save the generated blog content to a file.
output_path,
f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
)
# Write to the file
try:
with open(blog_output_path, "w") as f:
f.write(dedent(blog_frontmatter))
f.write(blog_frontmatter)
f.write(blog_content)
except Exception as e:
raise Exception(f"Failed to write blog content: {e}")
logger.info(f"\nSuccessfully saved and Posted blog at: {blog_output_path,}\n")
logger.info(f"Successfully saved and posted blog at: {blog_output_path}")
# Helper function
def remove_stop_words(sentence):
# Tokenize the sentence into words
"""
Removes stop words from a given sentence.
Args:
sentence (str): The sentence from which to remove stop words.
Returns:
str: The sentence after removing stop words.
"""
words = nltk.word_tokenize(sentence)
# Get the list of English stop words
stop_words = set(stopwords.words('english'))
# Remove stop words from the sentence
filtered_words = [word for word in words if word.lower() not in stop_words]
# Join the filtered words back into a sentence
filtered_sentence = ' '.join(filtered_words)
return filtered_sentence
return ' '.join(filtered_words)