import re ####################################################################### # # I want you to act as a plagiarism checker. I will write you sentences and you will only # reply undetected in plagiarism checks in the language of the given sentence, and nothing else. # Do not write explanations on replies. My first sentence is {} # ###################################################################### class PlagiarismChecker: def __init__(self, known_sources): self.known_sources = known_sources def check_plagiarism(self, html_content): try: # Preprocess the HTML content by removing HTML tags and extra spaces text = re.sub(r'<[^>]+>', ' ', html_content) text = re.sub(r'\s+', ' ', text).strip().lower() # Check for exact matches with known sources for source in self.known_sources: source_text = re.sub(r'<[^>]+>', ' ', source) source_text = re.sub(r'\s+', ' ', source_text).strip().lower() if text == source_text: return f"Plagiarism detected: Matches known source - {source}" # If no exact matches are found return "No plagiarism detected. Content is original." except Exception as e: return str(e) # Example usage: if __name__ == "__main__": # List of known sources known_sources = [ """
This is sample content from known source 1.
""", """This is some content from another known source.
""" ] # HTML content to check for plagiarism html_content = """This is sample content.
""" plagiarism_checker = PlagiarismChecker(known_sources) result = plagiarism_checker.check_plagiarism(html_content) print(result)