136 lines
5.1 KiB
Python
136 lines
5.1 KiB
Python
import sys
|
|
import os
|
|
import re
|
|
import datetime
|
|
import random
|
|
from dateutil.relativedelta import relativedelta
|
|
from textwrap import dedent
|
|
import logging
|
|
from zoneinfo import ZoneInfo
|
|
import nltk
|
|
from nltk.corpus import stopwords
|
|
from loguru import logger
|
|
logger.remove()
|
|
logger.add(sys.stdout,
|
|
colorize=True,
|
|
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
|
)
|
|
|
|
# fixme: Remove the hardcoding, need add another option OR in config ?
|
|
image_dir = "blog_images"
|
|
image_dir = os.path.join(os.getcwd(), image_dir)
|
|
# TBD: This can come from config file.
|
|
output_path = "blogs"
|
|
output_path = os.path.join(os.getcwd(), output_path)
|
|
|
|
|
|
def random_date_last_three_months():
|
|
current_date = datetime.datetime.now(ZoneInfo('Asia/Kolkata'))
|
|
three_months_ago = current_date - relativedelta(months=3)
|
|
|
|
# Generate a random date between three_months_ago and current_date
|
|
random_date = three_months_ago + datetime.timedelta(
|
|
seconds=random.randint(0, int((current_date - three_months_ago).total_seconds()))
|
|
)
|
|
|
|
return random_date.strftime('%Y-%m-%d %H:%M:%S %z')
|
|
|
|
|
|
def save_blog_to_file(blog_content, blog_title, blog_meta_desc, blog_tags, blog_categories, main_img_path=None, file_type="md"):
|
|
"""
|
|
Saves the provided blog content to a file in the specified format.
|
|
|
|
Args:
|
|
blog_content (str): The main content of the blog.
|
|
blog_title (str): Title of the blog.
|
|
blog_meta_desc (str): Meta description of the blog.
|
|
blog_tags (list): List of tags associated with the blog.
|
|
blog_categories (list): List of categories associated with the blog.
|
|
main_img_path (str): Path to the main image of the blog.
|
|
output_path (str): Path to the directory where the blog will be saved.
|
|
file_type (str, optional): The file format for saving the blog ('md' for Markdown or 'html' for HTML). Defaults to 'md'.
|
|
|
|
Raises:
|
|
FileNotFoundError: If the output_path does not exist.
|
|
Exception: If the blog content cannot be written to the file.
|
|
"""
|
|
blog_frontmatter = ''
|
|
# Sanitize and prepare the blog title
|
|
# Remove colon and ampersand
|
|
blog_title_md = blog_title.replace(":", "").replace("&", "")
|
|
# Replace spaces with hyphens
|
|
blog_title_md = blog_title_md.replace(" ", "-")
|
|
blog_title_md = re.sub('[^A-Za-z0-9-]', '', blog_title_md)
|
|
# Replace multiple consecutive dashes with a single dash
|
|
blog_title_md = re.sub('-+', '-', blog_title_md)
|
|
blog_title_md = remove_stop_words(blog_title_md)
|
|
logger.debug(f"Blog Title is: {blog_title_md}")
|
|
|
|
# Check if output path exists
|
|
if not os.path.exists(output_path):
|
|
logger.error(f"Error: Blog output directory is set to {output_path}, which does not exist.")
|
|
raise FileNotFoundError(f"Output directory does not exist: {output_path}")
|
|
|
|
# Handle Markdown file type
|
|
if file_type == "md":
|
|
logger.info("Writing/Saving the resultant blog content in Markdown format.")
|
|
# Hmmmm, bulk generation will benefit from randomizing publishing dates.
|
|
#dtobj = datetime.datetime.now(ZoneInfo('Asia/Kolkata'))
|
|
#formatted_date = dtobj.strftime('%Y-%m-%d %H:%M:%S %z')
|
|
formatted_date = random_date_last_three_months()
|
|
blog_title = blog_title.replace(":", "-").replace('"', '').replace('**', '')
|
|
if main_img_path:
|
|
blog_frontmatter = dedent(f"""\
|
|
---
|
|
title: {blog_title}
|
|
date: {formatted_date}
|
|
categories: [{blog_categories}]
|
|
tags: [{blog_tags}]
|
|
description: {blog_meta_desc.replace(":", "-").replace('**', '')}
|
|
img_path: '/assets/'
|
|
image:
|
|
path: {os.path.basename(main_img_path)}
|
|
alt: {blog_title}
|
|
---\n\n""")
|
|
else:
|
|
blog_frontmatter = dedent(f"""\
|
|
---
|
|
title: {blog_title}
|
|
date: {formatted_date}
|
|
categories: [{blog_categories}]
|
|
tags: [{blog_tags}]
|
|
description: {blog_meta_desc.replace(":", "-")}
|
|
---\n\n""")
|
|
|
|
blog_output_path = os.path.join(
|
|
output_path,
|
|
f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
|
|
)
|
|
|
|
# Write to the file
|
|
try:
|
|
with open(blog_output_path, "w") as f:
|
|
f.write(blog_frontmatter)
|
|
f.write(blog_content)
|
|
except Exception as e:
|
|
raise Exception(f"Failed to write blog content: {e}")
|
|
|
|
logger.info(f"Successfully saved and posted blog at: {blog_output_path}")
|
|
|
|
|
|
# Helper function
|
|
def remove_stop_words(sentence):
|
|
"""
|
|
Removes stop words from a given sentence.
|
|
|
|
Args:
|
|
sentence (str): The sentence from which to remove stop words.
|
|
|
|
Returns:
|
|
str: The sentence after removing stop words.
|
|
"""
|
|
words = nltk.word_tokenize(sentence)
|
|
stop_words = set(stopwords.words('english'))
|
|
filtered_words = [word for word in words if word.lower() not in stop_words]
|
|
return ' '.join(filtered_words)
|