Initial version of blog Gen

This commit is contained in:
AjaySi
2023-10-07 10:51:22 +05:30
commit 12010c0478
27 changed files with 1295 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
*.pyc
__pycache__
pseo-experiemnts/

0
README.md Normal file
View File

2
TBD Normal file
View File

@@ -0,0 +1,2 @@
https://github.com/hardikvasa/google-images-download

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.0 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

1
lib/.gpte_consent Normal file
View File

@@ -0,0 +1 @@
true

6
lib/blog_proof_reader.py Normal file
View File

@@ -0,0 +1,6 @@
"""
I want you act as a proofreader. I will provide you texts and I would like you to review them for any spelling, grammar, or punctuation errors. Once you have finished reviewing the text, provide me with any necessary corrections or suggestions for improve the text.
I want you to act as SEO editor and copywriter. I need you to proofread and analyze the following text and optimize it for the focus keyword. I also need you to correct any grammar mistakes you find in the article. Ask me to provide you with the article text and focus keyword.
"""

View File

@@ -0,0 +1,179 @@
#########################################################
#
# This module will generate images for the blogs using APIs
# from Dall-E and other free resources. Given a prompt, the
# images will be stored in local directory.
# Required: openai API key.
#
#########################################################
# imports
import openai # OpenAI Python library to make API calls
import requests # used to download images
import os # used to access filepaths
from PIL import Image # used to logger.info and edit images
# set API key
# Taking from env is safer than hardcoding here. But, not all have shell to export.
# Better to take it from a config file and pass it as a parameter.
# variable OPENAI_API_KEY=<API-KEY>
openai.api_key = os.environ.get("OPENAI_API_KEY")
# set a directory to save DALL·E images to
image_dir_name = "blog_images"
image_dir = os.path.join(os.curdir, image_dir_name)
# create the directory if it doesn't yet exist
if not os.path.isdir(image_dir):
os.mkdir(image_dir)
def generate_image(logger, num_images=1, img_size="1024x1024", response_format="url"):
"""
The generation API endpoint creates an image based on a text prompt.
Required inputs:
prompt (str): A text description of the desired image(s). The maximum length is 1000 characters.
Optional inputs:
--> num_images (int): The number of images to generate. Must be between 1 and 10. Defaults to 1.
--> size (str): The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024".
Smaller images are faster. Defaults to "1024x1024".
-->response_format (str): The format in which the generated images are returned.
Must be one of "url" or "b64_json". Defaults to "url".
--> user (str): A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
"""
# logger.info the directory to save to. TBD: Need to log these.
logger.info(f"Generated blog images will be stored at: {image_dir=}")
# TBD: Ask gpt for prompt for AI generated images as:
# I want you to act as an artist advisor providing advice on various art styles such tips on utilizing
# light & shadow effects effectively in painting, shading techniques while sculpting etc.
# Develop prompts for an AI-generated art piece inspired by [concept], using [symbolism] and [metaphor].
# Provide prompts for an AI-generated art piece inspired by [era] art, incorporating [medium] and [subject matter].
# Develop a set of prompts that could be used to generate AI-generated art focused on the theme of “urban decay.”
# I want you to act as a prompt generator for Science Fiction Art and
# give me five prompts that transport me to a futuristic world.
# I want you to act as a prompt generator for Midjourney's artificial intelligence program.
# Your job is to provide detailed and creative descriptions that will inspire unique and interesting images from the AI.
# Keep in mind that the AI is capable of understanding a wide range of language and can interpret abstract concepts,
# so feel free to be as imaginative and descriptive as possible. For example,
# you could describe a scene from a futuristic city, or a surreal landscape filled with strange creatures.
# The more detailed and imaginative your description, the more interesting the resulting image will be.
# Here is your first prompt: ""
prompt = "An illustration of AI teaching human to speak"
# call the OpenAI API to generate image from prompt.
logger.info(f"Calling openai.image.generate with prompt: {prompt}")
try:
img_generation_response = openai.Image.create(
prompt=prompt,
n=1,
size="1024x1024",
response_format="url",
)
except AttributeError as aerr:
logger.info(f"Failed to generate Image, Try: pip install openai --upgrade in your terminal.Error: {aerr}")
else:
# logger.info response/result. dbg.
print(f"{img_generation_response}")
save_generated_image(logger, img_generation_response)
def save_generated_image(logger, img_generation_response):
"""
"""
# save the image
# We need to change the image name to unique, overwrite and for SEO considerations.
# Note: filetype should be *.png
generated_image_name = "generated_image.png"
generated_image_filepath = os.path.join(image_dir, generated_image_name)
# extract image URL from response
generated_image_url = img_generation_response["data"][0]["url"]
print(f"Extracted URL: {generated_image_url}")
# We use the requests library to fetch the image from URL
response = requests.get(generated_image_url, stream=True)
# We use the Image Class from PIL library to open the image
Image.open(response.raw)
# Download the image.
try:
generated_image = requests.get(generated_image_url).content
except requests.exceptions.RequestException as e:
raise SystemExit(f"Failed to get generted image content: {e}")
else:
with open(generated_image_filepath, "wb") as image_file:
# Write the image to a file and store.
image_file.write(generated_image)
# Optional, dbg.
# logger.info the image
#logger.info(generated_image_filepath)
print("Display the generated image.")
img = Image.open(generated_image_filepath)
img.show()
# Close image window.
#for proc in psutil.process_iter():
# if proc.name() == "Image Viewer":
# proc.kill()
# WIP
# The idea is to download images from other blogs and recreate from it.
# This helps us generate images very close to the topic and also not worry about prompt message.
def gen_new_from_given_img(logger, num_img=1, img_size="1024x1024", response_format="url"):
"""
This function will take an image and produce a variant of it.
Required inputs:
image (str): The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB, and square.
Optional inputs:
n (int): The number of images to generate. Must be between 1 and 10. Defaults to 1.
size (str): The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024".
Smaller images are faster. Defaults to "1024x1024".
response_format (str): The format in which the generated images are returned. Must be one of "url" or "b64_json". Defaults to "url".
user (str): A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
"""
img_path = "/home/ajsingh/pseo_experiments_V0.0.1/blog_images/variation_example.png"
try:
png = Image.open(img_path).convert('RGBA')
background = Image.new('RGBA', png.size, (255, 255, 255))
alpha_composite = Image.alpha_composite(background, png)
alpha_composite.save('foo.png', 'PNG', quality=80)
variation_response = openai.Image.create_variation(
image=open('foo.jpg', "rb"),
n=num_img,
size=img_size,
response_format=response_format,
)
except Exception as err:
logger.error(f"An error occured in Image.create_variation::: {err}")
SystemExit(1)
# logger.info response
logger.info(variation_response)
# save the images
variation_urls = [datum["url"] for datum in variation_response["data"]] # extract URLs
variation_images = [requests.get(url).content for url in variation_urls] # download images
variation_image_names = [f"variation_image_{i}.png" for i in range(len(variation_images))] # create names
variation_image_filepaths = [os.path.join(image_dir, name) for name in variation_image_names] # create filepaths
for image, filepath in zip(variation_images, variation_image_filepaths): # loop through the variations
with open(filepath, "wb") as image_file: # open the file
image_file.write(image) # write the image to the file
# logger.info the original image
logger.info(generated_image_filepath)
orig_img = Image.open(generated_image_filepath)
orig_img.show()
# logger.info the new variations
for variation_image_filepaths in variation_image_filepaths:
logger.info(variation_image_filepaths)
var_img = Image.open(variation_image_filepaths)
var_img.show()

200
lib/get_text_response.py Normal file
View File

@@ -0,0 +1,200 @@
########################################################################
#
# Common module for getting response from gpt for given prompt.
# This module includes following capabilities:
#
#
#
########################################################################
import openai
from tqdm import tqdm, trange
import time
import re
def get_prompt_reply(prompt, max_token, outputs=1):
try:
# using OpenAI's Completion module that helps execute
# any tasks involving text
response = openai.Completion.create(
# model name used here is text-davinci-003
# there are many other models available under the
# umbrella of GPT-3
model="text-davinci-003",
# passing the user input
prompt=prompt,
# generated output can have "max_tokens" number of tokens
max_tokens=max_token,
# number of outputs generated in one call
n=outputs
)
except openai.error.Timeout as e:
#Handle timeout error, e.g. retry or log
print(f"OpenAI API request timed out: {e}")
pass
except openai.error.APIError as e:
#Handle API error, e.g. retry or log
print(f"OpenAI API returned an API Error: {e}")
pass
except openai.error.APIConnectionError as e:
#Handle connection error, e.g. check network or log
print(f"OpenAI API request failed to connect: {e}")
pass
except openai.error.InvalidRequestError as e:
#Handle invalid request error, e.g. validate parameters or log
print(f"OpenAI API request was invalid: {e}")
pass
except openai.error.AuthenticationError as e:
#Handle authentication error, e.g. check credentials or log
print(f"OpenAI API request was not authorized: {e}")
pass
except openai.error.PermissionError as e:
#Handle permission error, e.g. check scope or log
print(f"OpenAI API request was not permitted: {e}")
pass
except openai.error.RateLimitError as e:
#Handle rate limit error, e.g. wait or log
print(f"OpenAI API request exceeded rate limit: {e}")
pass
print(f"Prompt output: {response.choices[0].text.strip()}")
# creating a list to store all the outputs
output = list()
for k in response['choices']:
output.append(k['text'].strip())
return output
def generate_detailed_blog(blog_keywords):
"""
This function will take a blog Topic to first generate sections for it
and then generate content for each section.
"""
# TBD
# I want you to act as a blogger and you want to write a blog post about [topic],
# with a friendly and approachable tone that engages readers.
# Your target audience is [define your target audience].
# Write in a personal style using singular first-person pronouns only.
# I want you to include these keywords: [keyword 1], [keyword 2], [keyword 3] throughout the article.
# Format your response using markdown.
# Use headings, subheadings, bullet points, and bold to organize the information.
# Answer the most commonly asked questions about the topic at the end of the article.
# Create a list of the most popular tools used by the [Field of Interest] professionals with the pros and cons of each tool.
# Use to store the blog in a string, to save in a *.md file.
blog_markdown_str = ""
blog_topic_arr = list(generate_blog_topics(blog_keywords).split("\n"))
# Remove null values and incomplete results.
while('' in blog_topic_arr):
blog_topic_arr.remove('')
print(f"Generated Blog Topics: {type(blog_topic_arr)}---- {blog_topic_arr}")
# For each of blog topic, generate content.
for a_blog_topic in blog_topic_arr:
# Error in generating topic content: Rate limit reached for default-global-with-image-limits
# in free account on requests per min. Limit: 3 / min. Please try again in 20s.
for i in trange(30):
time.sleep(1)
# The generated topics usually have 1) or ^\W*\D* . Remove them from prompt.
a_topic = re.sub(r"^\W*\D*", "", a_blog_topic)
tpc_cnt = generate_topic_content(a_topic)
#print(f"{a_topic} ------ {tpc_cnt}")
# We now need to concatenate all the sections and sew it into blog content.
tmp_blog_markdown_str = blog_markdown_str + " " + a_blog_topic + " " + f"{tpc_cnt}"
blog_markdown_str = blog_markdown_str + a_blog_topic + "\n\n" + f"{tpc_cnt}" + "\n\n"
# print/check the final blog content.
print(f"Final blog content: {blog_markdown_str}")
# Save the blog content as a .md file. Markdown or HTML ?
# Best to name the file
def generate_blog_topics(blog_keywords):
"""
For a given prompt, generate blog topics.
Using the davinci-instruct-beta-v3 model. Its proven to be an ideal
one for generating unique blog content.
Ex: Generate SEO optimized blog topics on AI text to image with Python
"""
# Prompt engineering, huh ?
# Create a blog post about “{blogPostTopic}” . Write it in a “{tone}” tone. Use transition words.
# Use active voice. Write over 1000 words. The blog post should be in a beginners guide style.
# Add title and subtitle for each section. It should have a minimum of 6 sections.
# Include the following keywords: “{keywords}”. Create a good slug for this post and a
# meta description with a maximum of 100 words. and add it to the end of the blog post
prompt = f"As an experienced AI scientist and technical writer, generate SEO optimized blog topics about {blog_keywords}."
#prompt = "Generate SEO optimized blog topics for" + " " + f"{blog_keywords}"
try:
response = openai.Completion.create(
engine="davinci-instruct-beta-v3",
prompt=prompt,
temperature=0.7,
max_tokens=100,
top_p=1,
frequency_penalty=0,
presence_penalty=0
)
return response.choices[0].text
except Exception as err:
print(f"Error in generating blog topics: {err}")
def generate_topic_content(prompt):
"""
For each of given topic generate content for it.
"""
try:
# Generate a blog post outline for the following topic: {topic}.
# The outline should contain various subheadings and include the starting sentence for each section.
prompt = f"As an experienced AI researcher and technical writer, blog about {prompt}."
response = openai.Completion.create(
engine="davinci-instruct-beta-v3",
prompt=prompt,
temperature=0.7,
max_tokens=500,
top_p=1,
frequency_penalty=0,
presence_penalty=0
)
except Exception as err:
print(f"Error in generating topic content: {err}")
return response.choices[0].text
def generate_blog_description():
"""
Prompt designed to give SEO optimized blog descripton
"""
# Suggest keywords that I should include in my meta description for my blog post on [topic]
# I want to generate high CTR meta and keyword rich meta title and meta descriptions in text format.
# My keywords are [keyword 1], [keyword 2], [keyword 3]
pass
def get_blog_tags(blog_article):
"""
Function to suggest tags for the given blog content
"""
# Suggest at least 5 tags for the following blog post [Enter your blog post text here].
pass
def get_long_tailed_keywords(blog_article):
"""
Function to get long tailed keywords for the blog article.
"""
# want you to generate a list of long-tail keywords that are related to the following blog post [Enter blog post text here]
pass

View File

@@ -0,0 +1,65 @@
##############################################################################################
#
# Checks for:
# Short, fragmented sentences that lack human-like coherence.
# Frequent use of overly complex words or technical jargon.
#
# These checks are based on common observations that AI-generated content may sometimes produce
# text with unusual patterns or characteristics. However, please keep in mind that these
# heuristics are not guaranteed to detect all AI-generated content, and false positives or
# negatives can still occur. More advanced techniques and models would be required for more accurate detection.
#
#############################################################################################
import spacy
# Load the English language model from spaCy
nlp = spacy.load("en_core_web_sm")
def is_ai_generated(text):
# Tokenize the text using spaCy
doc = nlp(text)
# Check for indicators of AI-generated content
ai_indicators = [
"generated by AI",
"auto-generated",
"machine-generated",
"artificial intelligence",
"neural network",
"GPT-3",
"AI model",
]
for indicator in ai_indicators:
if indicator.lower() in text.lower():
return True
# Check for repetitive patterns or lack of human-like variations
for i in range(len(doc) - 2):
if doc[i].text == doc[i + 1].text == doc[i + 2].text:
return True
# Check for short, fragmented sentences that lack human-like coherence
for sentence in doc.sents:
if len(sentence) < 5:
return True
# Check for frequent use of overly complex words or technical jargon
complex_word_count = sum(1 for token in doc if token.is_alpha and len(token.text) > 10)
if complex_word_count > len(doc) // 10: # Adjust the threshold as needed
return True
return False
if __name__ == "__main__":
input_text = """
This is an article generated by a state-of-the-art AI model.
The content is machine-generated and may not represent human writing style.
"""
if is_ai_generated(input_text):
print("The content appears to be AI-generated.")
else:
print("The content appears to be written by a human.")

View File

@@ -0,0 +1,20 @@
## main.py
from plagiarism_checker import PlagiarismChecker
def main():
# Create an instance of the PlagiarismChecker class
checker = PlagiarismChecker()
# Get the input string from the user
input_string = input("Enter the input string: ")
# Check plagiarism in the input string
percentage = checker.check_plagiarism(input_string)
# Print the percentage of original content
print(f"The percentage of original content is: {percentage}%")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,71 @@
import re
class PlagiarismChecker:
def __init__(self, known_sources):
self.known_sources = known_sources
def check_plagiarism(self, html_content):
try:
# Preprocess the HTML content by removing HTML tags and extra spaces
text = re.sub(r'<[^>]+>', ' ', html_content)
text = re.sub(r'\s+', ' ', text).strip().lower()
# Check for exact matches with known sources
for source in self.known_sources:
source_text = re.sub(r'<[^>]+>', ' ', source)
source_text = re.sub(r'\s+', ' ', source_text).strip().lower()
if text == source_text:
return f"Plagiarism detected: Matches known source - {source}"
# If no exact matches are found
return "No plagiarism detected. Content is original."
except Exception as e:
return str(e)
# Example usage:
if __name__ == "__main__":
# List of known sources
known_sources = [
"""
<html>
<head>
<title>Sample Page 1</title>
</head>
<body>
<h1>Hello, World!</h1>
<p>This is sample content from known source 1.</p>
</body>
</html>
""",
"""
<html>
<head>
<title>Sample Page 2</title>
</head>
<body>
<h1>Welcome to Known Source 2</h1>
<p>This is some content from another known source.</p>
</body>
</html>
"""
]
# HTML content to check for plagiarism
html_content = """
<html>
<head>
<title>Sample Page</title>
</head>
<body>
<h1>Hello, World!</h1>
<p>This is sample content.</p>
</body>
</html>
"""
plagiarism_checker = PlagiarismChecker(known_sources)
result = plagiarism_checker.check_plagiarism(html_content)
print(result)

33
lib/seo_module/README.md Normal file
View File

@@ -0,0 +1,33 @@
## Implementation approach
To implement the SEO module, we will use the following open-source tools and frameworks:
1. Natural Language Toolkit (NLTK): NLTK is a popular library for natural language processing in Python. We can leverage NLTK to perform various SEO checks on the given text, such as keyword density, readability analysis, and sentiment analysis.
2. Beautiful Soup: Beautiful Soup is a Python library for web scraping. We can use Beautiful Soup to extract relevant information from the given text, such as meta tags, headings, and image alt attributes.
3. PyEnchant: PyEnchant is a spell checking library for Python. We can utilize PyEnchant to check the spelling and grammar of the given text and provide suggestions for improvement.
4. TextBlob: TextBlob is a library for processing textual data. We can use TextBlob to perform part-of-speech tagging, noun phrase extraction, and other linguistic analyses on the given text.
5. Flask: Use Flask for local testing and development purposes. Flask provides a lightweight web framework that allows us to quickly build and test our SEO module.
Overall, by leveraging these open-source tools and frameworks, we can develop a comprehensive and efficient SEO module that meets the requirements and provides valuable insights and suggestions for improving the SEO of the given text.
## Required Python third-party packages
- nltk==3.6.2
- beautifulsoup4==4.9.3
- pyenchant==3.2.1
- textblob==0.15.3
- flask==1.1.2
## Modules
The 'text_processor.py' file contains the TextProcessor class, which is responsible for extracting meta tags, headings, and image alt attributes from the given text.
The 'spell_checker.py' file contains the SpellChecker class, which is responsible for checking the spelling and grammar of the given text.
The 'seo_checker.py' file contains the SEOChecker class, which is responsible for coordinating the SEO checks by utilizing the TextProcessor and SpellChecker classes.

View File

@@ -0,0 +1,135 @@
###################################################
#
# The script covers many SEO factors, including keyword presence, title length,
# meta description, images, img alt text, headings, internal links, external links,
# spelling errors, grammar errors, and readability.
#
##################################################
import re
from bs4 import BeautifulSoup
from textstat import flesch_reading_ease
import spellchecker
class SEOAnalyzer:
def __init__(self, html_content, target_keywords):
self.html_content = html_content
self.target_keywords = target_keywords
def analyze_html_content(self):
try:
soup = BeautifulSoup(self.html_content, 'html.parser')
# Extract and clean text from HTML
text = ' '.join(soup.stripped_strings)
text = re.sub(r'\s+', ' ', text)
# Calculate keyword density
keyword_density = {}
for keyword in self.target_keywords:
keyword_density[keyword] = (text.lower().count(keyword.lower()) / len(text.split())) * 100
# Check for the presence of keywords in the title
title_tag = soup.find('title')
title_text = title_tag.text.lower() if title_tag else ''
keyword_presence_in_title = {keyword: keyword.lower() in title_text for keyword in self.target_keywords}
# Check for the presence of images and keywords in image alt text
images = soup.find_all('img')
img_alt_text = [img.get('alt', '').lower() for img in images]
keyword_presence_in_img_alt_text = {keyword: any(keyword.lower() in alt_text for alt_text in img_alt_text) for keyword in self.target_keywords}
# Check for the presence of headings
headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
headings_text = ' '.join(heading.text.lower() for heading in headings)
# Check for the presence of internal and external links
internal_links = len([link for link in soup.find_all('a') if '#' not in link.get('href', '')])
external_links = len([link for link in soup.find_all('a') if 'http' in link.get('href', '')])
# Calculate readability score
readability_score = flesch_reading_ease(text)
# Check for spelling and grammar errors
spell = spellchecker.SpellChecker()
spelling_errors = len(spell.unknown(text.split()))
grammar_errors = len(spell.check_grammar(text))
# Calculate SEO score
seo_score = 0
# Check for the presence of relevant keywords
for keyword in self.target_keywords:
if keyword in text.lower():
seo_score += 1
# Check for title length
title_length = len(title_text.split()) if title_text else 0
recommended_title_length = (50, 70)
if recommended_title_length[0] <= title_length <= recommended_title_length[1]:
seo_score += 1
# Generate suggestions for improvement
suggestions = []
if seo_score < 5:
suggestions.append("Add more relevant keywords to your HTML content.")
suggestions.append("Make sure your title contains keywords.")
suggestions.append("Add keywords to image alt text.")
suggestions.append("Add headings to your HTML content.")
suggestions.append("Add internal links to your HTML content.")
return {
'Keyword Density': keyword_density,
'Keyword Presence in Title': keyword_presence_in_title,
'Keyword Presence in Image Alt Text': keyword_presence_in_img_alt_text,
'Headings Text': headings_text,
'Internal Links': internal_links,
'External Links': external_links,
'Readability Score': readability_score,
'Spelling Errors': spelling_errors,
'Grammar Errors': grammar_errors,
'SEO Score': seo_score,
'Suggestions': suggestions
}
except Exception as e:
return {'error': str(e)}
# Example usage:
if __name__ == "__main__":
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>SEO Analyzer - Sample Page</title>
<meta name="description" content="This is a sample page for SEO analysis.">
</head>
<body>
<h1>Welcome to the SEO Analyzer</h1>
<p>This is a sample page with some sample content for SEO analysis. It mentions the target keywords SEO, keywords, and content.</p>
<img src="image1.jpg" alt="SEO image">
<img src="image2.jpg" alt="Keywords image">
</body>
</html>
"""
keywords = ['SEO', 'keywords', 'content'] # Replace with your target keywords
seo_analyzer = SEOAnalyzer(html_content, keywords)
results = seo_analyzer.analyze_html_content()
print("SEO Analysis Results:")
print(f"Keyword Density: {results['Keyword Density']}")
print(f"Keyword Presence in Title: {results['Keyword Presence in Title']}")
print(f"Keyword Presence in Image Alt Text: {results['Keyword Presence in Image Alt Text']}")
print(f"Headings Text: {results['Headings Text']}")
print(f"Internal Links: {results['Internal Links']}")
print(f"External Links: {results['External Links']}")
print(f"Readability Score: {results['Readability Score']}")
print(f"Spelling Errors: {results['Spelling Errors']}")
print(f"Grammar Errors: {results['Grammar Errors']}")
print(f"SEO Score: {results['SEO Score']}")
print("Suggestions:")
for suggestion in results['Suggestions']:
print(suggestion)

View File

@@ -0,0 +1,115 @@
from typing import List, Dict, Union
from nltk import tokenize, stem, pos_tag
from textblob import TextBlob
import enchant
class TextPreprocessor:
def preprocess_text(self, text: str) -> str:
# Tokenize the text
tokens = tokenize.word_tokenize(text)
# Stem the tokens
stemmer = stem.PorterStemmer()
stemmed_tokens = [stemmer.stem(token) for token in tokens]
# Join the stemmed tokens back into a string
preprocessed_text = ' '.join(stemmed_tokens)
return preprocessed_text
class SEOAnalyzer:
def calculate_seo_percentage(self, text: str, keywords: List[str]) -> float:
# Calculate the keyword density
keyword_density = self.calculate_keyword_density(text, keywords)
# Calculate the readability score
readability_score = self.calculate_readability_score(text)
# Perform semantic analysis
semantic_score = self.perform_semantic_analysis(text)
# Calculate the SEO percentage based on the metrics
seo_percentage = (keyword_density + readability_score + semantic_score) / 3
return seo_percentage
def calculate_keyword_density(self, text: str, keywords: List[str]) -> float:
# Count the number of occurrences of each keyword in the text
keyword_counts = {keyword: text.lower().count(keyword.lower()) for keyword in keywords}
# Calculate the total number of words in the text
word_count = len(tokenize.word_tokenize(text))
# Calculate the keyword density
keyword_density = sum(keyword_counts.values()) / word_count
return keyword_density
def calculate_readability_score(self, text: str) -> float:
# Calculate the average number of words per sentence
sentences = tokenize.sent_tokenize(text)
word_count = sum(len(tokenize.word_tokenize(sentence)) for sentence in sentences)
sentence_count = len(sentences)
average_words_per_sentence = word_count / sentence_count
# Calculate the readability score
readability_score = 1 / average_words_per_sentence
return readability_score
def perform_semantic_analysis(self, text: str) -> float:
# Perform part-of-speech tagging on the text
tagged_text = pos_tag(tokenize.word_tokenize(text))
# Calculate the semantic score based on the number of nouns and verbs
noun_count = sum(1 for word, pos in tagged_text if pos.startswith('N'))
verb_count = sum(1 for word, pos in tagged_text if pos.startswith('V'))
semantic_score = (noun_count + verb_count) / len(tagged_text)
return semantic_score
class SpellChecker:
def check_spelling(self, text: str) -> List[str]:
# Create a spellchecker object
spellchecker = enchant.Dict("en_US")
# Tokenize the text
tokens = tokenize.word_tokenize(text)
# Check the spelling of each token
misspelled_words = [token for token in tokens if not spellchecker.check(token)]
return misspelled_words
class SEOAnalysisModule:
def __init__(self):
self.text_preprocessor = TextPreprocessor()
self.seo_analyzer = SEOAnalyzer()
self.spell_checker = SpellChecker()
def analyze_text(self, text: str, keywords: List[str]) -> Dict[str, Union[float, List[str]]]:
# Preprocess the text
preprocessed_text = self.text_preprocessor.preprocess_text(text)
# Calculate the SEO percentage
seo_percentage = self.seo_analyzer.calculate_seo_percentage(preprocessed_text, keywords)
# Calculate the keyword density
keyword_density = self.seo_analyzer.calculate_keyword_density(preprocessed_text, keywords)
# Calculate the readability score
readability_score = self.seo_analyzer.calculate_readability_score(preprocessed_text)
# Perform semantic analysis
semantic_score = self.seo_analyzer.perform_semantic_analysis(preprocessed_text)
# Check the spelling
spelling_errors = self.spell_checker.check_spelling(preprocessed_text)
return {
'seo_percentage': seo_percentage,
'keyword_density': keyword_density,
'readability_score': readability_score,
'semantic_score': semantic_score,
'spelling_errors': spelling_errors
}

View File

@@ -0,0 +1,71 @@
## Required Python third-party packages
- requests==2.26.0
- pytest==6.2.5
- json==2.0.9
## Logic Analysis
- ['main.py', 'Main']
- ['wordpress_api.py', 'WordpressAPI']
- ['test_wordpress_api.py', 'TestWordpressAPI']
## Task list
'main.py' contains the main entry point of the program.
'wordpress_api.py' contains the implementation of the WordpressAPI class, which handles the integration with the Wordpress API.
'test_wordpress_api.py' contains unit tests for the WordpressAPI class.
## Implementation approach
To implement the wordpress API integration module, we will use the requests library, which is a popular open-source library for making HTTP requests in Python. This library provides a simple and intuitive way to send HTTP requests and handle responses. We will also use the json library to handle JSON data. Additionally, we will write unit tests using the pytest framework to ensure the functionality and quality of the module. The module will be designed to be easily integrated into existing Python codebases by providing clear usage instructions and documentation.
## Python package name
wordpress_api_integration
## File list
- main.py
- wordpress_api.py
- test_wordpress_api.py
## Data structures and interface definitions
classDiagram
class WordpressAPI{
+str base_url
+str username
+str password
+str token
+str authenticate()
+str upload_content(str content)
}
WordpressAPI "1" -- "1" Authentication: has
WordpressAPI "1" -- "1" ContentUpload: has
class Authentication{
+str authenticate()
}
class ContentUpload{
+str upload_content(str content)
}
## Program call flow
sequenceDiagram
participant M as Main
participant WP as WordpressAPI
participant A as Authentication
participant CU as ContentUpload
M->>WP: Create WordpressAPI instance
WP->>A: Create Authentication instance
A->>WP: Authenticate
WP->>CU: Create ContentUpload instance
CU->>WP: Upload content

View File

@@ -0,0 +1,21 @@
## main.py
from wordpress_api import WordpressAPI
def main():
"""
Main entry point of the program.
"""
# Create WordpressAPI instance
wp_api = WordpressAPI(base_url="https://example.com", username="admin", password="password")
# Authenticate
wp_api.authenticate()
# Upload content
content = "This is a test content"
wp_api.upload_content(content)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,30 @@
## test_wordpress_api.py
import pytest
from wordpress_api import WordpressAPI
class TestWordpressAPI:
@pytest.fixture
def wp_api(self):
return WordpressAPI(base_url="https://example.com", username="admin", password="password")
def test_authenticate_success(self, wp_api):
wp_api.authenticate()
assert wp_api.authentication.token is not None
def test_authenticate_failure(self, wp_api):
wp_api.authentication.password = "wrong_password"
with pytest.raises(Exception):
wp_api.authenticate()
def test_upload_content_success(self, wp_api):
content = "This is a test content"
wp_api.upload_content(content)
# Add assertions here to verify the success of content upload
def test_upload_content_failure(self, wp_api):
content = "This is a test content"
wp_api.content_upload.base_url = "https://wrong_url.com"
with pytest.raises(Exception):
wp_api.upload_content(content)

View File

@@ -0,0 +1,75 @@
## wordpress_api.py
import requests
import json
class Authentication:
def __init__(self, base_url, username, password):
self.base_url = base_url
self.username = username
self.password = password
self.token = None
def authenticate(self):
"""
Authenticates the user with the Wordpress API.
"""
url = f"{self.base_url}/authenticate"
payload = {
"username": self.username,
"password": self.password
}
headers = {
"Content-Type": "application/json"
}
response = requests.post(url, json=payload, headers=headers)
if response.status_code == 200:
self.token = response.json()["token"]
else:
raise Exception("Authentication failed")
class ContentUpload:
def __init__(self, base_url, token):
self.base_url = base_url
self.token = token
def upload_content(self, content):
"""
Uploads the given content to the Wordpress API.
"""
url = f"{self.base_url}/upload"
payload = {
"content": content,
"token": self.token
}
headers = {
"Content-Type": "application/json"
}
response = requests.post(url, json=payload, headers=headers)
if response.status_code != 200:
raise Exception("Content upload failed")
class WordpressAPI:
def __init__(self, base_url, username, password):
self.base_url = base_url
self.username = username
self.password = password
self.authentication = Authentication(base_url, username, password)
self.content_upload = ContentUpload(base_url, self.authentication.token)
def authenticate(self):
"""
Authenticates the user with the Wordpress API.
"""
self.authentication.authenticate()
def upload_content(self, content):
"""
Uploads the given content to the Wordpress API.
"""
self.content_upload.upload_content(content)

View File

@@ -0,0 +1,54 @@
## main.py
import os
import requests
import json
class WordPressAPIIntegration:
def __init__(self, credentials: dict):
self.credentials = credentials
def upload_file(self, file_path: str) -> bool:
if not self._check_file(file_path):
return False
if not self._authenticate():
return False
if not self._upload_file_to_api(file_path):
return False
return True
def _check_file(self, file_path: str) -> bool:
max_file_size = 10 * 1024 * 1024 # 10MB
file_size = os.path.getsize(file_path)
if file_size > max_file_size:
return False
valid_file_types = ['.jpg', '.jpeg', '.png', '.gif']
file_extension = os.path.splitext(file_path)[1]
if file_extension not in valid_file_types:
return False
return True
def _authenticate(self) -> bool:
url = "https://wordpress-api.com/authenticate"
headers = {'Content-Type': 'application/json'}
data = json.dumps(self.credentials)
response = requests.post(url, headers=headers, data=data)
if response.status_code == 200:
return True
return False
def _upload_file_to_api(self, file_path: str) -> bool:
url = "https://wordpress-api.com/upload"
files = {'file': open(file_path, 'rb')}
response = requests.post(url, files=files)
if response.status_code == 200:
return True
return False

View File

@@ -0,0 +1,86 @@
## test_wordpress_api_integration.py
import os
import pytest
from wordpress_api_integration import WordPressAPIIntegration
class TestWordPressAPIIntegration:
@pytest.fixture
def credentials(self):
return {
"username": "test_user",
"password": "test_password"
}
@pytest.fixture
def valid_file_path(self):
return "path/to/valid/file.jpg"
@pytest.fixture
def invalid_file_path(self):
return "path/to/invalid/file.txt"
def test_upload_file_valid_file(self, credentials, valid_file_path, monkeypatch):
def mock_check_file(file_path):
return True
def mock_authenticate():
return True
def mock_upload_file_to_api(file_path):
return True
monkeypatch.setattr(WordPressAPIIntegration, "_check_file", mock_check_file)
monkeypatch.setattr(WordPressAPIIntegration, "_authenticate", mock_authenticate)
monkeypatch.setattr(WordPressAPIIntegration, "_upload_file_to_api", mock_upload_file_to_api)
api_integration = WordPressAPIIntegration(credentials)
result = api_integration.upload_file(valid_file_path)
assert result is True
def test_upload_file_invalid_file(self, credentials, invalid_file_path, monkeypatch):
def mock_check_file(file_path):
return False
monkeypatch.setattr(WordPressAPIIntegration, "_check_file", mock_check_file)
api_integration = WordPressAPIIntegration(credentials)
result = api_integration.upload_file(invalid_file_path)
assert result is False
def test_upload_file_authentication_failed(self, credentials, valid_file_path, monkeypatch):
def mock_check_file(file_path):
return True
def mock_authenticate():
return False
monkeypatch.setattr(WordPressAPIIntegration, "_check_file", mock_check_file)
monkeypatch.setattr(WordPressAPIIntegration, "_authenticate", mock_authenticate)
api_integration = WordPressAPIIntegration(credentials)
result = api_integration.upload_file(valid_file_path)
assert result is False
def test_upload_file_upload_failed(self, credentials, valid_file_path, monkeypatch):
def mock_check_file(file_path):
return True
def mock_authenticate():
return True
def mock_upload_file_to_api(file_path):
return False
monkeypatch.setattr(WordPressAPIIntegration, "_check_file", mock_check_file)
monkeypatch.setattr(WordPressAPIIntegration, "_authenticate", mock_authenticate)
monkeypatch.setattr(WordPressAPIIntegration, "_upload_file_to_api", mock_upload_file_to_api)
api_integration = WordPressAPIIntegration(credentials)
result = api_integration.upload_file(valid_file_path)
assert result is False

View File

@@ -0,0 +1,54 @@
## wordpress_api_integration.py
import os
import requests
import json
class WordPressAPIIntegration:
def __init__(self, credentials: dict):
self.credentials = credentials
def upload_file(self, file_path: str) -> bool:
if not self._check_file(file_path):
return False
if not self._authenticate():
return False
if not self._upload_file_to_api(file_path):
return False
return True
def _check_file(self, file_path: str) -> bool:
max_file_size = 10 * 1024 * 1024 # 10MB
file_size = os.path.getsize(file_path)
if file_size > max_file_size:
return False
valid_file_types = ['.jpg', '.jpeg', '.png', '.gif']
file_extension = os.path.splitext(file_path)[1]
if file_extension not in valid_file_types:
return False
return True
def _authenticate(self) -> bool:
url = "https://wordpress-api.com/authenticate"
headers = {'Content-Type': 'application/json'}
data = json.dumps(self.credentials)
response = requests.post(url, headers=headers, data=data)
if response.status_code == 200:
return True
return False
def _upload_file_to_api(self, file_path: str) -> bool:
url = "https://wordpress-api.com/upload"
files = {'file': open(file_path, 'rb')}
response = requests.post(url, files=files)
if response.status_code == 200:
return True
return False

27
main_config.ini Normal file
View File

@@ -0,0 +1,27 @@
###################################################
#
# This is the main config file which drives the code.
# This config will restrict code modifications and hence
# ease of usuability.
#
##################################################
# Set the Openai API key
openai_api_key=""
# bard_api=""
# ms_bing_api=""
# Mention which model to use, default is GPT-3.5
model_name=""
# Write the prompt for generating TEXT reply from GenAI engine
txt_prompt=""
# An effective text prompt may have several components, including:
# Main subject: the who of the prompt, e.g. Small puppy with a fluffy white tail wearing a red collar.
# Action: the what/how of the prompt, e.g. Joyfully carrying a long wooden stick.
# Surroundings: the when/where of the prompt, e.g. On a busy street corner at dusk. A small basketball court in the background.
# Visual aesthetics: how you want the images to look, e.g. Shot from above, soft yellow light, blurred background.
# Write the prompt for generating IMG reply from GenAI engines
img_text=""

4
prompt Normal file
View File

@@ -0,0 +1,4 @@
Move all hard coded values from the modules and put the them in a config file.
Suggest functions that be improved upon on readibility, polymorphism and remove redundany.
Make the code conform to PEP standards.
Include try and except. Include exception at possible places. Include detailed excpetions and error messages.

37
pseo_main.py Normal file
View File

@@ -0,0 +1,37 @@
#!/usr/bin/python3
#########################################################
#
# This is the main module for calling pseo related functions.
# This is the end user interface and is user driven.
# TBD: argsparser and taking config file. For usuability,
# no editing of code should be required.
#
#########################################################
import sys
import json
import traceback
from loguru import logger
logger.add(sys.stdout, colorize=True, format="<green>{time}</green> <level>{message}</level>")
from lib.generate_image_from_prompt import generate_image, gen_new_from_given_img
from lib.get_text_response import get_prompt_reply, generate_detailed_blog
try:
logger.info("Starting homebrew pseo blog generator.")
prompt = "Create a detailed and technical blog of best AI tools for text-to-video conversion in 2023, along with features, pricing, pros, cons, and website links and if free or paid version. Summarize this blog in conclusion at the end. Write in markdown."
#txt_reply = get_prompt_reply(prompt, 2000)
# The idea is to
#generate_image(logger)
#gen_new_from_given_img(logger)
# Generate detailed blog by only providing keywords from blog title.
# Example: AI text to video tools
generate_detailed_blog("text to video AI tools")
except Exception as err:
#logger.exception(f"traceback.print_exc()")
logger.error(f"Error occured in main::{err}")

6
requirements.txt Normal file
View File

@@ -0,0 +1,6 @@
loguru
openai
Pillow
requests
tqdm
urllib3