############################################################################################## # # Checks for: # Short, fragmented sentences that lack human-like coherence. # Frequent use of overly complex words or technical jargon. # # These checks are based on common observations that AI-generated content may sometimes produce # text with unusual patterns or characteristics. However, please keep in mind that these # heuristics are not guaranteed to detect all AI-generated content, and false positives or # negatives can still occur. More advanced techniques and models would be required for more accurate detection. # ############################################################################################# import spacy # Load the English language model from spaCy nlp = spacy.load("en_core_web_sm") def is_ai_generated(text): # Tokenize the text using spaCy doc = nlp(text) # Check for indicators of AI-generated content ai_indicators = [ "generated by AI", "auto-generated", "machine-generated", "artificial intelligence", "neural network", "GPT-3", "AI model", ] for indicator in ai_indicators: if indicator.lower() in text.lower(): return True # Check for repetitive patterns or lack of human-like variations for i in range(len(doc) - 2): if doc[i].text == doc[i + 1].text == doc[i + 2].text: return True # Check for short, fragmented sentences that lack human-like coherence for sentence in doc.sents: if len(sentence) < 5: return True # Check for frequent use of overly complex words or technical jargon complex_word_count = sum(1 for token in doc if token.is_alpha and len(token.text) > 10) if complex_word_count > len(doc) // 10: # Adjust the threshold as needed return True return False if __name__ == "__main__": input_text = """ This is an article generated by a state-of-the-art AI model. The content is machine-generated and may not represent human writing style. """ if is_ai_generated(input_text): print("The content appears to be AI-generated.") else: print("The content appears to be written by a human.")