Files
ALwrity/lib/is_content_ai_generated.py
2023-10-07 10:51:22 +05:30

66 lines
2.2 KiB
Python

##############################################################################################
#
# Checks for:
# Short, fragmented sentences that lack human-like coherence.
# Frequent use of overly complex words or technical jargon.
#
# These checks are based on common observations that AI-generated content may sometimes produce
# text with unusual patterns or characteristics. However, please keep in mind that these
# heuristics are not guaranteed to detect all AI-generated content, and false positives or
# negatives can still occur. More advanced techniques and models would be required for more accurate detection.
#
#############################################################################################
import spacy
# Load the English language model from spaCy
nlp = spacy.load("en_core_web_sm")
def is_ai_generated(text):
# Tokenize the text using spaCy
doc = nlp(text)
# Check for indicators of AI-generated content
ai_indicators = [
"generated by AI",
"auto-generated",
"machine-generated",
"artificial intelligence",
"neural network",
"GPT-3",
"AI model",
]
for indicator in ai_indicators:
if indicator.lower() in text.lower():
return True
# Check for repetitive patterns or lack of human-like variations
for i in range(len(doc) - 2):
if doc[i].text == doc[i + 1].text == doc[i + 2].text:
return True
# Check for short, fragmented sentences that lack human-like coherence
for sentence in doc.sents:
if len(sentence) < 5:
return True
# Check for frequent use of overly complex words or technical jargon
complex_word_count = sum(1 for token in doc if token.is_alpha and len(token.text) > 10)
if complex_word_count > len(doc) // 10: # Adjust the threshold as needed
return True
return False
if __name__ == "__main__":
input_text = """
This is an article generated by a state-of-the-art AI model.
The content is machine-generated and may not represent human writing style.
"""
if is_ai_generated(input_text):
print("The content appears to be AI-generated.")
else:
print("The content appears to be written by a human.")