##############################################################################################
#
# Checks for:
# Short, fragmented sentences that lack human-like coherence.
# Frequent use of overly complex words or technical jargon.
#
# These checks are based on common observations that AI-generated content may sometimes produce 
# text with unusual patterns or characteristics. However, please keep in mind that these 
# heuristics are not guaranteed to detect all AI-generated content, and false positives or 
# negatives can still occur. More advanced techniques and models would be required for more accurate detection.
#
#############################################################################################

import spacy

# Load the English language model from spaCy
nlp = spacy.load("en_core_web_sm")

def is_ai_generated(text):
    # Tokenize the text using spaCy
    doc = nlp(text)

    # Check for indicators of AI-generated content
    ai_indicators = [
        "generated by AI",
        "auto-generated",
        "machine-generated",
        "artificial intelligence",
        "neural network",
        "GPT-3",
        "AI model",
    ]

    for indicator in ai_indicators:
        if indicator.lower() in text.lower():
            return True

    # Check for repetitive patterns or lack of human-like variations
    for i in range(len(doc) - 2):
        if doc[i].text == doc[i + 1].text == doc[i + 2].text:
            return True

    # Check for short, fragmented sentences that lack human-like coherence
    for sentence in doc.sents:
        if len(sentence) < 5:
            return True

    # Check for frequent use of overly complex words or technical jargon
    complex_word_count = sum(1 for token in doc if token.is_alpha and len(token.text) > 10)
    if complex_word_count > len(doc) // 10:  # Adjust the threshold as needed
        return True

    return False

if __name__ == "__main__":
    input_text = """
    This is an article generated by a state-of-the-art AI model.
    The content is machine-generated and may not represent human writing style.
    """

    if is_ai_generated(input_text):
        print("The content appears to be AI-generated.")
    else:
        print("The content appears to be written by a human.")