Blogen-V.000.0.1 Added features,Cleanup. WIP

2023-12-09 18:07:18 +05:30
parent edc468f4aa
commit eaf13c2d16
164 changed files with 1859 additions and 71990 deletions
--- a/lib/save_blog_to_file.py
+++ b/lib/save_blog_to_file.py
@@ -0,0 +1,76 @@
+def save_blog_to_file(blog_content, blog_title, 
+        blog_meta_desc, blog_tags, blog_categories, main_img_path, file_type="md"):
+    """ Common function to save the generated blog to a file.
+    arg: file_type can be md or html
+    """
+    # Convert the spaces in blog_title with dash
+    logger.info(f"The blog will be saved at: {output_path}")
+    logger.debug(f"Blog Title is: {blog_title}")
+    blog_title_md = blog_title
+    regex = re.compile('[^a-zA-Z0-9- ]')
+    blog_title_md = regex.sub('', blog_title_md)
+    blog_title= blog_title.replace(":", "")
+    blog_title_md = re.sub('--+', '-', blog_title_md)
+    blog_title_md = blog_title_md.replace(' ', '-')
+    blog_title_md = remove_stop_words(blog_title_md)
+
+    if ':' in blog_meta_desc:
+        blog_meta_desc  = blog_meta_desc.split(':')[1].strip()
+
+    if not os.path.exists(output_path):
+        logger.error("Error: Blog output directory is set to {output_path}, which Does Not Exist.")
+
+    # Different output formats are plaintext, html and markdown.
+    if file_type in "md":
+        logger.info(f"Writing/Saving the resultant blog content in Markdown format.")
+        # fill the Front Matter as below at the top of the post: https://jekyllrb.com/docs/front-matter/
+        # date: YYYY-MM-DD HH:MM:SS +/-TTTT
+        from zoneinfo import ZoneInfo
+        tz=ZoneInfo('Asia/Kolkata')
+        dtobj = datetime.datetime.now(tz=ZoneInfo('Asia/Kolkata'))
+        formatted_date = f"{dtobj.strftime('%Y-%m-%d %H:%M:%S %z')}"
+
+        blog_frontmatter = f"""\
+                        ---
+                        title: {blog_title}
+                        date: {formatted_date}
+                        categories: [{blog_categories}]
+                        tags: [{blog_tags}]
+                        description: {blog_meta_desc}
+                        img_path: '/assets/'
+                        image:
+                            path: {os.path.basename(main_img_path)}
+                            alt: {blog_title}
+                        ---\n\n"""
+
+        # Create a new file named YYYY-MM-DD-TITLE.EXTENSION and put it in the _posts of the root directory. 
+        # Please note that the EXTENSION must be one of md or markdown
+        blog_output_path = os.path.join(
+                output_path,
+                f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
+                )
+        # Save the generated blog content to a file.
+        try:
+            with open(blog_output_path, "w") as f:
+                f.write(dedent(blog_frontmatter))
+                f.write(blog_content)
+        except Exception as e:
+            raise Exception(f"Failed to write blog content: {e}")
+        logger.info(f"\nSuccessfully saved and Posted blog at: {blog_output_path,}\n")
+
+
+# Helper function
+def remove_stop_words(sentence):
+    # Tokenize the sentence into words
+    words = nltk.word_tokenize(sentence)
+
+    # Get the list of English stop words
+    stop_words = set(stopwords.words('english'))
+
+    # Remove stop words from the sentence
+    filtered_words = [word for word in words if word.lower() not in stop_words]
+
+    # Join the filtered words back into a sentence
+    filtered_sentence = ' '.join(filtered_words)
+
+    return filtered_sentence