66 lines
2.2 KiB
Python
66 lines
2.2 KiB
Python
##############################################################################################
|
|
#
|
|
# Checks for:
|
|
# Short, fragmented sentences that lack human-like coherence.
|
|
# Frequent use of overly complex words or technical jargon.
|
|
#
|
|
# These checks are based on common observations that AI-generated content may sometimes produce
|
|
# text with unusual patterns or characteristics. However, please keep in mind that these
|
|
# heuristics are not guaranteed to detect all AI-generated content, and false positives or
|
|
# negatives can still occur. More advanced techniques and models would be required for more accurate detection.
|
|
#
|
|
#############################################################################################
|
|
|
|
import spacy
|
|
|
|
# Load the English language model from spaCy
|
|
nlp = spacy.load("en_core_web_sm")
|
|
|
|
def is_ai_generated(text):
|
|
# Tokenize the text using spaCy
|
|
doc = nlp(text)
|
|
|
|
# Check for indicators of AI-generated content
|
|
ai_indicators = [
|
|
"generated by AI",
|
|
"auto-generated",
|
|
"machine-generated",
|
|
"artificial intelligence",
|
|
"neural network",
|
|
"GPT-3",
|
|
"AI model",
|
|
]
|
|
|
|
for indicator in ai_indicators:
|
|
if indicator.lower() in text.lower():
|
|
return True
|
|
|
|
# Check for repetitive patterns or lack of human-like variations
|
|
for i in range(len(doc) - 2):
|
|
if doc[i].text == doc[i + 1].text == doc[i + 2].text:
|
|
return True
|
|
|
|
# Check for short, fragmented sentences that lack human-like coherence
|
|
for sentence in doc.sents:
|
|
if len(sentence) < 5:
|
|
return True
|
|
|
|
# Check for frequent use of overly complex words or technical jargon
|
|
complex_word_count = sum(1 for token in doc if token.is_alpha and len(token.text) > 10)
|
|
if complex_word_count > len(doc) // 10: # Adjust the threshold as needed
|
|
return True
|
|
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
input_text = """
|
|
This is an article generated by a state-of-the-art AI model.
|
|
The content is machine-generated and may not represent human writing style.
|
|
"""
|
|
|
|
if is_ai_generated(input_text):
|
|
print("The content appears to be AI-generated.")
|
|
else:
|
|
print("The content appears to be written by a human.")
|
|
|