Blogen-V0.1 Added features. WIP
This commit is contained in:
65
lib/seo_module/is_content_ai_generated.py
Normal file
65
lib/seo_module/is_content_ai_generated.py
Normal file
@@ -0,0 +1,65 @@
|
||||
##############################################################################################
|
||||
#
|
||||
# Checks for:
|
||||
# Short, fragmented sentences that lack human-like coherence.
|
||||
# Frequent use of overly complex words or technical jargon.
|
||||
#
|
||||
# These checks are based on common observations that AI-generated content may sometimes produce
|
||||
# text with unusual patterns or characteristics. However, please keep in mind that these
|
||||
# heuristics are not guaranteed to detect all AI-generated content, and false positives or
|
||||
# negatives can still occur. More advanced techniques and models would be required for more accurate detection.
|
||||
#
|
||||
#############################################################################################
|
||||
|
||||
import spacy
|
||||
|
||||
# Load the English language model from spaCy
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
|
||||
def is_ai_generated(text):
|
||||
# Tokenize the text using spaCy
|
||||
doc = nlp(text)
|
||||
|
||||
# Check for indicators of AI-generated content
|
||||
ai_indicators = [
|
||||
"generated by AI",
|
||||
"auto-generated",
|
||||
"machine-generated",
|
||||
"artificial intelligence",
|
||||
"neural network",
|
||||
"GPT-3",
|
||||
"AI model",
|
||||
]
|
||||
|
||||
for indicator in ai_indicators:
|
||||
if indicator.lower() in text.lower():
|
||||
return True
|
||||
|
||||
# Check for repetitive patterns or lack of human-like variations
|
||||
for i in range(len(doc) - 2):
|
||||
if doc[i].text == doc[i + 1].text == doc[i + 2].text:
|
||||
return True
|
||||
|
||||
# Check for short, fragmented sentences that lack human-like coherence
|
||||
for sentence in doc.sents:
|
||||
if len(sentence) < 5:
|
||||
return True
|
||||
|
||||
# Check for frequent use of overly complex words or technical jargon
|
||||
complex_word_count = sum(1 for token in doc if token.is_alpha and len(token.text) > 10)
|
||||
if complex_word_count > len(doc) // 10: # Adjust the threshold as needed
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
input_text = """
|
||||
This is an article generated by a state-of-the-art AI model.
|
||||
The content is machine-generated and may not represent human writing style.
|
||||
"""
|
||||
|
||||
if is_ai_generated(input_text):
|
||||
print("The content appears to be AI-generated.")
|
||||
else:
|
||||
print("The content appears to be written by a human.")
|
||||
|
||||
80
lib/seo_module/plagiarism_checker_from_known_sources.py
Normal file
80
lib/seo_module/plagiarism_checker_from_known_sources.py
Normal file
@@ -0,0 +1,80 @@
|
||||
import re
|
||||
|
||||
#######################################################################
|
||||
#
|
||||
# I want you to act as a plagiarism checker. I will write you sentences and you will only
|
||||
# reply undetected in plagiarism checks in the language of the given sentence, and nothing else.
|
||||
# Do not write explanations on replies. My first sentence is {}
|
||||
#
|
||||
######################################################################
|
||||
|
||||
|
||||
class PlagiarismChecker:
|
||||
def __init__(self, known_sources):
|
||||
self.known_sources = known_sources
|
||||
|
||||
def check_plagiarism(self, html_content):
|
||||
try:
|
||||
# Preprocess the HTML content by removing HTML tags and extra spaces
|
||||
text = re.sub(r'<[^>]+>', ' ', html_content)
|
||||
text = re.sub(r'\s+', ' ', text).strip().lower()
|
||||
|
||||
# Check for exact matches with known sources
|
||||
for source in self.known_sources:
|
||||
source_text = re.sub(r'<[^>]+>', ' ', source)
|
||||
source_text = re.sub(r'\s+', ' ', source_text).strip().lower()
|
||||
if text == source_text:
|
||||
return f"Plagiarism detected: Matches known source - {source}"
|
||||
|
||||
# If no exact matches are found
|
||||
return "No plagiarism detected. Content is original."
|
||||
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
# List of known sources
|
||||
known_sources = [
|
||||
"""
|
||||
<html>
|
||||
<head>
|
||||
<title>Sample Page 1</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Hello, World!</h1>
|
||||
<p>This is sample content from known source 1.</p>
|
||||
</body>
|
||||
</html>
|
||||
""",
|
||||
"""
|
||||
<html>
|
||||
<head>
|
||||
<title>Sample Page 2</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Welcome to Known Source 2</h1>
|
||||
<p>This is some content from another known source.</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
]
|
||||
|
||||
# HTML content to check for plagiarism
|
||||
html_content = """
|
||||
<html>
|
||||
<head>
|
||||
<title>Sample Page</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Hello, World!</h1>
|
||||
<p>This is sample content.</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
plagiarism_checker = PlagiarismChecker(known_sources)
|
||||
result = plagiarism_checker.check_plagiarism(html_content)
|
||||
|
||||
print(result)
|
||||
|
||||
Reference in New Issue
Block a user