WIP- Under maintenence- Web research working.

This commit is contained in:
AjaySi
2024-02-05 15:15:07 +05:30
parent fd7053fb4b
commit 2a3315f211
96 changed files with 4320 additions and 565 deletions

View File

@@ -0,0 +1,43 @@
from .gpt_providers.gemini_pro_text import gemini_text_response
from .gpt_providers.openai_chat_completion import openai_chatgpt
def blog_proof_editor(blog_content, blog_keywords, gpt_provider="openai"):
"""
Helper for blog proof reading.
"""
prompt = f"""I am looking for detailed editing and enhancement of the given blog post,
with a particular focus on maintaining originality.
The topic of the content is [{blog_keywords}]. Please go through the blog and make direct edits to improve it,
ensuring the final output is both high-quality and original.
Note: There are duplicates headings and corresponding paragraphs, rewrite into one subheading.
Here are the specific areas to focus on:
1). Ensure Originality: Edit any sections that lack originality, replacing them with unique and creative content.
2). Eliminate Repetitive Language: Rewrite repetitive phrases with varied and engaging language.
3). Vocabulary and Grammar Enhancement: Directly correct any grammatical errors and upgrade the
vocabulary for better readability.
4). Improve Sentence Structure: Enhance sentence construction for better clarity and flow.
5). Tone and Brand Alignment: Adjust the tone, voice, personality of given content to make it unique.
6). Optimize Content Structure: Reorganize the content for a more impactful presentation,
including better paragraphing and transitions.
7). Remove Redundancies: Important, Cut out any redundant information or overly complex jargon.
8). Refine Overall Structure: Make structural changes to improve the overall impact of the content.
9). Remember, rewrite all content that repeated, while maintaining the formatting of the given blog text.
Please apply these changes directly to the following blog post and provide the edited version:\n
'{blog_content}'. """
if 'openai' in gpt_provider:
try:
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Openai Error Blog Proof Reading: {err}")
elif 'gemini' in gpt_provider:
try:
response = gemini_text_response(prompt)
return response
except Exception as err:
SystemError(f"Gemini Error Blog Proof Reading: {err}")

View File

@@ -0,0 +1,75 @@
from .gpt_providers.openai_chat_completion import openai_chatgpt
from .gpt_providers.gemini_pro_text import gemini_text_response
def convert_tomarkdown_format(blog_content, gpt_provider="openai"):
""" Helper for converting content to markdown format for static sites. """
prompt = f"""As an expert in markdown language format and font matter,
I will provide you with a blog post.
Your task is to only Improve the formatting and structure of a blog post to enhance readability, visual appeal, and overall user experience. Do not alter the content of the provided blog. Modify only for the formatting.
Dont provide explanations, just your final response.
Guidelines to do formatting:
1. **Headings for Structure:**
- Use # for the main title of the blog post.
- Use ## for subheadings that divide the post into clear sections.
- Use ###, ####, etc. for additional subheadings as needed.
- Keep the headings concise and descriptive.
2. **Emphasizing Text:**
- Use * or _ for italicizing important words or phrases.
- Use ** or __ for bolding key points.
- Use *** or ___ for bold italicizing very important text.
- Use sparingly to avoid overwhelming the reader.
3. **Lists:**
- Use - or * for unordered lists.
- Use 1., 2., etc. for ordered lists.
- Keep list items concise and to the point.
- Use consistent formatting for all lists.
4. **Blockquotes:**
- Use > to indent and highlight quotes or important information.
- Use additional > for nested blockquotes.
- Attribute quotes to their original source if applicable.
5. **Code Blocks:**
- Use backticks ` for inline code.
- Use triple backticks ``` for code blocks.
- Specify the language of the code block for syntax highlighting, e.g., ```python```.
- Use code blocks to display code snippets or technical information.
6. **Horizontal Lines:**
- Use three or more asterisks, dashes, or underscores to create a horizontal line, e.g., ***, ---, or ___
- Use horizontal lines to separate different sections of the blog post.
7. **Table Formatting:**
- Use pipes | and dashes - to create tables.
- Align text within columns using colons :.
- Use tables to present data or information in a structured format.
8. **Other Best Practices:**
- Use emojis sparingly and appropriately to add visual interest and enhance the reader's experience.
- Proofread carefully for any errors in grammar, spelling, or formatting.
- Keep the blog post organized and easy to navigate.
- Use a consistent formatting style throughout the post.
Blog Post: '{blog_content}'"""
if 'openai' in gpt_provider:
try:
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Openai Error in converting to Markdown format.")
elif 'gemini' in gpt_provider:
prompt = f""" Convert the given blog post into well structured MARKDOWN content.
Do not alter the given blog post.
blog post: "{blog_content}" """
try:
response = gemini_text_response(prompt)
return response
except Exception as err:
SystemError(f"Gemini Error in converting to Markdown format.")

View File

@@ -0,0 +1,37 @@
from .gpt_providers.openai_chat_completion import openai_chatgpt
def convert_markdown_to_html(md_content):
""" Helper function to convert given text to HTML
"""
prompt =f"""
You are a skilled web developer tasked with converting a Markdown-formatted text to HTML.
You will be given text in markdown format. Follow these steps to perform the conversion:
1. Parse User's Markdown Input: You will receive a Markdown-formatted text as input from the user.
Carefully analyze the provided Markdown text, paying attention to different elements such as headings (#),
lists (unordered and ordered), bold and italic text, links, images, and code blocks.
2. Generate and Validate HTML: Generate corresponding HTML code for each Markdown element following
the conversion guidelines below. Ensure the generated HTML is well-structured and syntactically correct.
3. Preserve Line Breaks: Markdown line breaks (soft breaks) represented by two spaces at the end of a
line should be converted to <br> tags in HTML to preserve the line breaks.
4. REMEMBER to generate complete, valid HTML response only.
Follow below Conversion Guidelines:
- Headers: Convert Markdown headers (#, ##, ###, etc.) to corresponding HTML header tags (<h1>, <h2>, <h3>, etc.).
- Lists: Convert unordered lists (*) and ordered lists (1., 2., 3., etc.) to <ul> and <ol> HTML tags, respectively.
List items should be enclosed in <li> tags.
- Emphasis: Convert bold (**) and italic (*) text to <strong> and <em> HTML tags, respectively.
- Links: Convert Markdown links ([text](url)) to HTML anchor (<a>) tags. Ensure the href attribute contains the correct URL.
- Images: Convert Markdown image tags (![alt text](image_url)) to HTML image (<img>) tags.
Include the alt attribute for accessibility.
- Code: Convert inline code (`code`) to <code> HTML tags. Convert code blocks (```) to <pre> HTML tags
for preserving formatting.
- Blockquotes: Convert blockquotes (>) to <blockquote> HTML tags.
Convert the following Markdown text to HTML: {md_content}
"""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in convert to HTML")

View File

@@ -0,0 +1,135 @@
import sys
import os
import re
import datetime
import random
from dateutil.relativedelta import relativedelta
from textwrap import dedent
import logging
from zoneinfo import ZoneInfo
import nltk
from nltk.corpus import stopwords
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
# fixme: Remove the hardcoding, need add another option OR in config ?
image_dir = "blog_images"
image_dir = os.path.join(os.getcwd(), image_dir)
# TBD: This can come from config file.
output_path = "blogs"
output_path = os.path.join(os.getcwd(), output_path)
def random_date_last_three_months():
current_date = datetime.datetime.now(ZoneInfo('Asia/Kolkata'))
three_months_ago = current_date - relativedelta(months=3)
# Generate a random date between three_months_ago and current_date
random_date = three_months_ago + datetime.timedelta(
seconds=random.randint(0, int((current_date - three_months_ago).total_seconds()))
)
return random_date.strftime('%Y-%m-%d %H:%M:%S %z')
def save_blog_to_file(blog_content, blog_title, blog_meta_desc, blog_tags, blog_categories, main_img_path=None, file_type="md"):
"""
Saves the provided blog content to a file in the specified format.
Args:
blog_content (str): The main content of the blog.
blog_title (str): Title of the blog.
blog_meta_desc (str): Meta description of the blog.
blog_tags (list): List of tags associated with the blog.
blog_categories (list): List of categories associated with the blog.
main_img_path (str): Path to the main image of the blog.
output_path (str): Path to the directory where the blog will be saved.
file_type (str, optional): The file format for saving the blog ('md' for Markdown or 'html' for HTML). Defaults to 'md'.
Raises:
FileNotFoundError: If the output_path does not exist.
Exception: If the blog content cannot be written to the file.
"""
blog_frontmatter = ''
# Sanitize and prepare the blog title
# Remove colon and ampersand
blog_title_md = blog_title.replace(":", "").replace("&", "")
# Replace spaces with hyphens
blog_title_md = blog_title_md.replace(" ", "-")
blog_title_md = re.sub('[^A-Za-z0-9-]', '', blog_title_md)
# Replace multiple consecutive dashes with a single dash
blog_title_md = re.sub('-+', '-', blog_title_md)
blog_title_md = remove_stop_words(blog_title_md)
logger.debug(f"Blog Title is: {blog_title_md}")
# Check if output path exists
if not os.path.exists(output_path):
logger.error(f"Error: Blog output directory is set to {output_path}, which does not exist.")
raise FileNotFoundError(f"Output directory does not exist: {output_path}")
# Handle Markdown file type
if file_type == "md":
logger.info("Writing/Saving the resultant blog content in Markdown format.")
# Hmmmm, bulk generation will benefit from randomizing publishing dates.
#dtobj = datetime.datetime.now(ZoneInfo('Asia/Kolkata'))
#formatted_date = dtobj.strftime('%Y-%m-%d %H:%M:%S %z')
formatted_date = random_date_last_three_months()
blog_title = blog_title.replace(":", "-").replace('"', '').replace('**', '')
if main_img_path:
blog_frontmatter = dedent(f"""\
---
title: {blog_title}
date: {formatted_date}
categories: [{blog_categories}]
tags: [{blog_tags}]
description: {blog_meta_desc.replace(":", "-").replace('**', '')}
img_path: '/assets/'
image:
path: {os.path.basename(main_img_path)}
alt: {blog_title}
---\n\n""")
else:
blog_frontmatter = dedent(f"""\
---
title: {blog_title}
date: {formatted_date}
categories: [{blog_categories}]
tags: [{blog_tags}]
description: {blog_meta_desc.replace(":", "-")}
---\n\n""")
blog_output_path = os.path.join(
output_path,
f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
)
# Write to the file
try:
with open(blog_output_path, "w") as f:
f.write(blog_frontmatter)
f.write(blog_content)
except Exception as e:
raise Exception(f"Failed to write blog content: {e}")
logger.info(f"Successfully saved and posted blog at: {blog_output_path}")
# Helper function
def remove_stop_words(sentence):
"""
Removes stop words from a given sentence.
Args:
sentence (str): The sentence from which to remove stop words.
Returns:
str: The sentence after removing stop words.
"""
words = nltk.word_tokenize(sentence)
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word.lower() not in stop_words]
return ' '.join(filtered_words)