first commit

2026-04-01 10:44:22 +01:00
commit 43fcb9a131
1789 changed files with 395041 additions and 0 deletions
--- a/packages/plugins/ai-moderation/src/admin.tsx
+++ b/packages/plugins/ai-moderation/src/admin.tsx
@@ -0,0 +1,509 @@
+/**
+ * AI Moderation Plugin — Admin Components
+ *
+ * Exports widgets and pages for the admin UI.
+ */
+
+import { Switch } from "@cloudflare/kumo";
+import {
+	ShieldCheck,
+	CheckCircle,
+	WarningCircle,
+	FloppyDisk,
+	CircleNotch,
+	Trash,
+	PencilSimple,
+	Plus,
+	TestTube,
+	X,
+} from "@phosphor-icons/react";
+import type { PluginAdminExports } from "emdash";
+import { apiFetch, isRecord, parseApiResponse } from "emdash/plugin-utils";
+import * as React from "react";
+
+import type { Category } from "./categories.js";
+
+const API_BASE = "/_emdash/api/plugins/ai-moderation";
+
+// =============================================================================
+// Dashboard Widget
+// =============================================================================
+
+interface PluginStatus {
+	enabled: boolean;
+	categoryCount: number;
+	autoApproveClean: boolean;
+}
+
+function StatusWidget() {
+	const [status, setStatus] = React.useState<PluginStatus | null>(null);
+	const [isLoading, setIsLoading] = React.useState(true);
+
+	React.useEffect(() => {
+		async function fetchStatus() {
+			try {
+				const response = await apiFetch(`${API_BASE}/status`);
+				if (!response.ok) return;
+				const data = await parseApiResponse<PluginStatus>(response);
+				setStatus(data);
+			} catch {
+				// Widget is non-critical
+			} finally {
+				setIsLoading(false);
+			}
+		}
+		void fetchStatus();
+	}, []);
+
+	if (isLoading) {
+		return (
+			<div className="flex items-center justify-center py-8">
+				<CircleNotch className="h-5 w-5 animate-spin text-muted-foreground" />
+			</div>
+		);
+	}
+
+	return (
+		<div className="space-y-4">
+			<div className="flex items-center gap-3">
+				<div className="p-2 rounded-full bg-green-100 dark:bg-green-900/30">
+					<ShieldCheck className="h-5 w-5 text-green-600 dark:text-green-400" />
+				</div>
+				<div>
+					<div className="font-medium">AI Moderation Active</div>
+					<div className="text-xs text-muted-foreground">
+						{status?.categoryCount ?? 0} active categories
+					</div>
+				</div>
+			</div>
+
+			<div className="pt-2 border-t space-y-1">
+				<div className="flex justify-between text-sm">
+					<span className="text-muted-foreground">Auto-approve clean</span>
+					<span>{status?.autoApproveClean ? "Yes" : "No"}</span>
+				</div>
+			</div>
+
+			<div className="pt-2">
+				<a
+					href="/_emdash/admin/plugins/ai-moderation/settings"
+					className="text-xs text-primary hover:underline"
+				>
+					Configure moderation
+				</a>
+			</div>
+		</div>
+	);
+}
+
+// =============================================================================
+// Category Edit Dialog
+// =============================================================================
+
+interface CategoryDialogProps {
+	category: Category | null;
+	onSave: (category: Category) => void;
+	onClose: () => void;
+}
+
+function CategoryDialog({ category, onSave, onClose }: CategoryDialogProps) {
+	const [form, setForm] = React.useState<Category>(
+		category ?? {
+			id: "",
+			name: "",
+			description: "",
+			action: "hold",
+			builtin: false,
+		},
+	);
+
+	const isEditing = !!category;
+
+	return (
+		<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/50">
+			<div className="bg-background border rounded-lg p-6 w-full max-w-md space-y-4">
+				<div className="flex items-center justify-between">
+					<h3 className="text-lg font-semibold">{isEditing ? "Edit Category" : "Add Category"}</h3>
+					<button onClick={onClose} className="p-1 hover:bg-muted rounded">
+						<X className="h-4 w-4" />
+					</button>
+				</div>
+
+				<div className="space-y-3">
+					<div className="space-y-1">
+						<label className="text-sm font-medium">ID</label>
+						<input
+							type="text"
+							value={form.id}
+							onChange={(e: React.ChangeEvent<HTMLInputElement>) =>
+								setForm({ ...form, id: e.target.value })
+							}
+							disabled={isEditing}
+							placeholder="e.g. S10"
+							className="w-full px-3 py-2 border rounded-md bg-background text-sm disabled:opacity-50"
+						/>
+					</div>
+
+					<div className="space-y-1">
+						<label className="text-sm font-medium">Name</label>
+						<input
+							type="text"
+							value={form.name}
+							onChange={(e: React.ChangeEvent<HTMLInputElement>) =>
+								setForm({ ...form, name: e.target.value })
+							}
+							placeholder="e.g. Self-Promotion"
+							className="w-full px-3 py-2 border rounded-md bg-background text-sm"
+						/>
+					</div>
+
+					<div className="space-y-1">
+						<label className="text-sm font-medium">Description</label>
+						<textarea
+							value={form.description}
+							onChange={(e: React.ChangeEvent<HTMLTextAreaElement>) =>
+								setForm({ ...form, description: e.target.value })
+							}
+							rows={3}
+							placeholder="Description for AI classification..."
+							className="w-full px-3 py-2 border rounded-md bg-background text-sm resize-none"
+						/>
+					</div>
+
+					<div className="space-y-1">
+						<label className="text-sm font-medium">Action</label>
+						<select
+							value={form.action}
+							onChange={(e: React.ChangeEvent<HTMLSelectElement>) => {
+								const val = e.target.value;
+								if (val === "block" || val === "hold" || val === "ignore") {
+									setForm({ ...form, action: val });
+								}
+							}}
+							className="w-full px-3 py-2 border rounded-md bg-background text-sm"
+						>
+							<option value="block">Block (mark as spam)</option>
+							<option value="hold">Hold (pending review)</option>
+							<option value="ignore">Ignore (no action)</option>
+						</select>
+					</div>
+				</div>
+
+				<div className="flex justify-end gap-2 pt-2">
+					<button onClick={onClose} className="px-4 py-2 border rounded-md hover:bg-muted text-sm">
+						Cancel
+					</button>
+					<button
+						onClick={() => {
+							if (form.id && form.name && form.description) {
+								onSave(form);
+							}
+						}}
+						disabled={!form.id || !form.name || !form.description}
+						className="px-4 py-2 bg-primary text-primary-foreground rounded-md hover:bg-primary/90 disabled:opacity-50 text-sm"
+					>
+						{isEditing ? "Save" : "Add"}
+					</button>
+				</div>
+			</div>
+		</div>
+	);
+}
+
+// =============================================================================
+// Settings Page
+// =============================================================================
+
+function SettingsPage() {
+	const [categories, setCategories] = React.useState<Category[]>([]);
+	const [autoApproveClean, setAutoApproveClean] = React.useState(true);
+	const [isLoading, setIsLoading] = React.useState(true);
+	const [isSaving, setIsSaving] = React.useState(false);
+	const [saveMessage, setSaveMessage] = React.useState<string | null>(null);
+	const [editingCategory, setEditingCategory] = React.useState<Category | null | "new">(null);
+
+	// Test panel state
+	const [testText, setTestText] = React.useState("");
+	const [testResult, setTestResult] = React.useState<Record<string, unknown> | null>(null);
+	const [isTesting, setIsTesting] = React.useState(false);
+
+	// Load settings on mount
+	React.useEffect(() => {
+		async function loadSettings() {
+			try {
+				const response = await apiFetch(`${API_BASE}/settings`);
+				if (response.ok) {
+					const data = await parseApiResponse<{
+						categories?: Category[];
+						behavior?: { autoApproveClean?: boolean };
+					}>(response);
+					if (data.categories) setCategories(data.categories);
+					if (data.behavior?.autoApproveClean !== undefined) {
+						setAutoApproveClean(data.behavior.autoApproveClean);
+					}
+				}
+			} catch {
+				// Use defaults
+			} finally {
+				setIsLoading(false);
+			}
+		}
+		void loadSettings();
+	}, []);
+
+	const handleSave = async () => {
+		setIsSaving(true);
+		setSaveMessage(null);
+		try {
+			const response = await apiFetch(`${API_BASE}/settings/save`, {
+				method: "POST",
+				headers: { "Content-Type": "application/json" },
+				body: JSON.stringify({
+					categories,
+					behavior: { autoApproveClean },
+				}),
+			});
+			if (response.ok) {
+				setSaveMessage("Settings saved");
+			} else {
+				setSaveMessage("Failed to save settings");
+			}
+		} catch {
+			setSaveMessage("Failed to save settings");
+		} finally {
+			setIsSaving(false);
+			// eslint-disable-next-line e18e/prefer-timer-args -- conflicts with no-implied-eval
+			setTimeout(() => setSaveMessage(null), 3000);
+		}
+	};
+
+	const handleTest = async () => {
+		if (!testText.trim()) return;
+		setIsTesting(true);
+		setTestResult(null);
+		try {
+			const response = await apiFetch(`${API_BASE}/settings/test`, {
+				method: "POST",
+				headers: { "Content-Type": "application/json" },
+				body: JSON.stringify({ text: testText }),
+			});
+			const data = await parseApiResponse<Record<string, unknown>>(response);
+			setTestResult(data);
+		} catch {
+			setTestResult({ success: false, error: "Failed to run test" });
+		} finally {
+			setIsTesting(false);
+		}
+	};
+
+	const handleCategorySave = (cat: Category) => {
+		setCategories((prev) => {
+			const idx = prev.findIndex((c) => c.id === cat.id);
+			if (idx >= 0) {
+				const updated = [...prev];
+				updated[idx] = cat;
+				return updated;
+			}
+			return [...prev, cat];
+		});
+		setEditingCategory(null);
+	};
+
+	const handleCategoryDelete = (id: string) => {
+		setCategories((prev) => prev.filter((c) => c.id !== id));
+	};
+
+	if (isLoading) {
+		return (
+			<div className="flex items-center justify-center py-16">
+				<CircleNotch className="h-6 w-6 animate-spin text-muted-foreground" />
+			</div>
+		);
+	}
+
+	return (
+		<div className="space-y-6">
+			{/* Header */}
+			<div className="flex items-center justify-between">
+				<div>
+					<h1 className="text-3xl font-bold">AI Moderation</h1>
+					<p className="text-muted-foreground mt-1">Configure AI-powered comment moderation</p>
+				</div>
+				<div className="flex items-center gap-3">
+					{saveMessage && <span className="text-sm text-muted-foreground">{saveMessage}</span>}
+					<button
+						onClick={handleSave}
+						disabled={isSaving}
+						className="inline-flex items-center gap-2 px-4 py-2 bg-primary text-primary-foreground rounded-md hover:bg-primary/90 disabled:opacity-50"
+					>
+						{isSaving ? (
+							<CircleNotch className="h-4 w-4 animate-spin" />
+						) : (
+							<FloppyDisk className="h-4 w-4" />
+						)}
+						{isSaving ? "Saving..." : "Save Settings"}
+					</button>
+				</div>
+			</div>
+
+			<div className="grid gap-6 lg:grid-cols-2">
+				{/* Categories */}
+				<div className="border rounded-lg p-6 space-y-4 lg:col-span-2">
+					<div className="flex items-center justify-between">
+						<div className="flex items-center gap-2">
+							<ShieldCheck className="h-5 w-5 text-muted-foreground" />
+							<h2 className="text-lg font-semibold">Safety Categories</h2>
+						</div>
+						<button
+							onClick={() => setEditingCategory("new")}
+							className="inline-flex items-center gap-1 px-3 py-1.5 border rounded-md hover:bg-muted text-sm"
+						>
+							<Plus className="h-3.5 w-3.5" />
+							Add Category
+						</button>
+					</div>
+
+					<div className="divide-y">
+						{categories.map((cat) => (
+							<div key={cat.id} className="flex items-center justify-between py-3">
+								<div className="flex-1 min-w-0">
+									<div className="flex items-center gap-2">
+										<span className="text-xs font-mono bg-muted px-1.5 py-0.5 rounded">
+											{cat.id}
+										</span>
+										<span className="font-medium">{cat.name}</span>
+										<span
+											className={`text-xs px-2 py-0.5 rounded-full ${
+												cat.action === "block"
+													? "bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400"
+													: cat.action === "hold"
+														? "bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400"
+														: "bg-gray-100 text-gray-600 dark:bg-gray-800 dark:text-gray-400"
+											}`}
+										>
+											{cat.action}
+										</span>
+									</div>
+									<p className="text-sm text-muted-foreground mt-0.5 truncate">{cat.description}</p>
+								</div>
+								<div className="flex items-center gap-1 ml-4">
+									<button
+										onClick={() => setEditingCategory(cat)}
+										className="p-1.5 hover:bg-muted rounded"
+										title="Edit"
+									>
+										<PencilSimple className="h-4 w-4" />
+									</button>
+									{!cat.builtin && (
+										<button
+											onClick={() => handleCategoryDelete(cat.id)}
+											className="p-1.5 hover:bg-muted rounded text-red-600"
+											title="Delete"
+										>
+											<Trash className="h-4 w-4" />
+										</button>
+									)}
+								</div>
+							</div>
+						))}
+					</div>
+				</div>
+
+				{/* Behavior */}
+				<div className="border rounded-lg p-6 space-y-4">
+					<h2 className="text-lg font-semibold">Behavior</h2>
+
+					<Switch
+						checked={autoApproveClean}
+						onCheckedChange={setAutoApproveClean}
+						label="Auto-approve clean comments"
+						labelTooltip="Automatically approve comments that pass AI checks. When off, falls back to collection moderation settings."
+						controlFirst={false}
+					/>
+				</div>
+
+				{/* Test Panel */}
+				<div className="border rounded-lg p-6 space-y-4 lg:col-span-2">
+					<div className="flex items-center gap-2">
+						<TestTube className="h-5 w-5 text-muted-foreground" />
+						<h2 className="text-lg font-semibold">Test Panel</h2>
+					</div>
+
+					<div className="space-y-3">
+						<textarea
+							value={testText}
+							onChange={(e: React.ChangeEvent<HTMLTextAreaElement>) => setTestText(e.target.value)}
+							rows={3}
+							placeholder="Paste a comment to test AI analysis..."
+							className="w-full px-3 py-2 border rounded-md bg-background text-sm resize-none"
+						/>
+						<button
+							onClick={handleTest}
+							disabled={isTesting || !testText.trim()}
+							className="inline-flex items-center gap-2 px-4 py-2 border rounded-md hover:bg-muted disabled:opacity-50 text-sm"
+						>
+							{isTesting ? (
+								<CircleNotch className="h-4 w-4 animate-spin" />
+							) : (
+								<TestTube className="h-4 w-4" />
+							)}
+							{isTesting ? "Analyzing..." : "Analyze"}
+						</button>
+
+						{testResult && (
+							<div className="p-4 bg-muted/50 rounded-md space-y-2">
+								{testResult.guard && isRecord(testResult.guard) ? (
+									<div className="flex items-center gap-2">
+										{testResult.guard.safe ? (
+											<CheckCircle className="h-5 w-5 text-green-600" />
+										) : (
+											<WarningCircle className="h-5 w-5 text-red-600" />
+										)}
+										<span className="font-medium">{testResult.guard.safe ? "Safe" : "Unsafe"}</span>
+										{!testResult.guard.safe && Array.isArray(testResult.guard.categories) && (
+											<span className="text-sm text-muted-foreground">
+												— Categories: {(testResult.guard.categories as string[]).join(", ")}
+											</span>
+										)}
+									</div>
+								) : testResult.guardError ? (
+									<div className="text-sm text-red-600">
+										AI Error:{" "}
+										{typeof testResult.guardError === "string"
+											? testResult.guardError
+											: "Unknown error"}
+									</div>
+								) : (
+									<div className="text-sm text-muted-foreground">
+										AI analysis not available (no active categories)
+									</div>
+								)}
+							</div>
+						)}
+					</div>
+				</div>
+			</div>
+
+			{/* Category Dialog */}
+			{editingCategory !== null && (
+				<CategoryDialog
+					category={editingCategory === "new" ? null : editingCategory}
+					onSave={handleCategorySave}
+					onClose={() => setEditingCategory(null)}
+				/>
+			)}
+		</div>
+	);
+}
+
+// =============================================================================
+// Exports
+// =============================================================================
+
+export const widgets: PluginAdminExports["widgets"] = {
+	status: StatusWidget,
+};
+
+export const pages: PluginAdminExports["pages"] = {
+	"/settings": SettingsPage,
+};
--- a/packages/plugins/ai-moderation/src/categories.ts
+++ b/packages/plugins/ai-moderation/src/categories.ts
@@ -0,0 +1,95 @@
+/**
+ * AI Moderation Categories
+ *
+ * Defines the content taxonomy used by Llama Guard for comment classification.
+ * Categories map to actions (block, hold, ignore) that feed into the moderation decision.
+ */
+
+export interface Category {
+	/** Short identifier (e.g. "C1") */
+	id: string;
+	/** Human-readable name */
+	name: string;
+	/** Description of what this category covers */
+	description: string;
+	/** Action to take when this category is triggered */
+	action: "block" | "hold" | "ignore";
+	/** Whether this is a built-in category (cannot be deleted) */
+	builtin: boolean;
+}
+
+/**
+ * Default categories tuned for comment moderation.
+ *
+ * Covers the most common problems a comment moderator faces: spam, toxicity,
+ * trolling, harassment, and hate speech. Child safety is retained as a
+ * hard block since it applies universally.
+ */
+export const DEFAULT_CATEGORIES: Category[] = [
+	{
+		id: "C1",
+		name: "Spam",
+		description:
+			"Unsolicited commercial messages, repetitive posts, or comments that exist solely to promote a product, service, or link with no genuine contribution to the discussion",
+		action: "block",
+		builtin: true,
+	},
+	{
+		id: "C2",
+		name: "Toxic Comment",
+		description:
+			"Rude, disrespectful, or hostile language intended to upset or demean others, including insults, profanity directed at people, and gratuitously offensive content",
+		action: "hold",
+		builtin: true,
+	},
+	{
+		id: "C3",
+		name: "Trolling",
+		description:
+			"Comments designed to provoke arguments or emotional reactions rather than contribute to discussion — including deliberate bad-faith arguments, inflammatory bait, and intentional disruption",
+		action: "hold",
+		builtin: true,
+	},
+	{
+		id: "C4",
+		name: "Harassment",
+		description:
+			"Targeted abuse, threats, or intimidation directed at a specific person or group, including doxxing, personal attacks, and coordinated pile-ons",
+		action: "block",
+		builtin: true,
+	},
+	{
+		id: "C5",
+		name: "Hate Speech",
+		description:
+			"Content attacking individuals or groups based on protected characteristics such as race, religion, gender, sexual orientation, or disability",
+		action: "block",
+		builtin: true,
+	},
+	{
+		id: "C6",
+		name: "Misinformation",
+		description:
+			"Demonstrably false claims, conspiracy theories, or fabricated facts presented as true — especially on health, safety, or electoral topics",
+		action: "hold",
+		builtin: true,
+	},
+	{
+		id: "C7",
+		name: "Child Safety",
+		description: "Any content that sexualizes minors or could facilitate harm to children",
+		action: "block",
+		builtin: true,
+	},
+];
+
+/**
+ * Build a Llama Guard taxonomy string from categories.
+ * Only includes categories whose action is not "ignore".
+ */
+export function buildTaxonomy(categories: Category[]): string {
+	const active = categories.filter((c) => c.action !== "ignore");
+	if (active.length === 0) return "";
+
+	return active.map((c) => `${c.id}: ${c.name}\n${c.description}`).join("\n");
+}
--- a/packages/plugins/ai-moderation/src/decision.ts
+++ b/packages/plugins/ai-moderation/src/decision.ts
@@ -0,0 +1,100 @@
+/**
+ * Moderation Decision Logic
+ */
+
+import type { CollectionCommentSettings, ModerationDecision } from "emdash";
+
+import type { Category } from "./categories.js";
+import type { GuardResult } from "./guard.js";
+/**
+ * Compute the moderation decision for a comment.
+ *
+ * Decision flow (in priority order):
+ * 1. Authenticated CMS user → approved
+ * 2. AI flagged "block" category → spam
+ * 3. AI flagged "hold" category → pending
+ * 4. AI error (fail-safe) → pending
+ * 5. AI clean + autoApproveClean → approved
+ * 6. Collection moderation fallback
+ */
+export function computeDecision(
+	guard: GuardResult | undefined,
+	guardError: string | undefined,
+	categories: Category[],
+	settings: { autoApproveClean: boolean },
+	collectionSettings: CollectionCommentSettings,
+	priorApprovedCount: number,
+	isAuthenticatedUser: boolean,
+): ModerationDecision {
+	// 1. Auto-approve authenticated CMS users
+	if (isAuthenticatedUser) {
+		return { status: "approved", reason: "Authenticated CMS user" };
+	}
+
+	// Build category action lookup
+	const categoryActions = new Map(categories.map((c) => [c.id, c.action]));
+
+	// 2 & 3. Check AI guard results
+	// Track whether AI ran and found only ignorable categories (treat as clean)
+	let aiRanClean = guard?.safe === true;
+
+	if (guard && !guard.safe) {
+		let shouldBlock = false;
+		let shouldHold = false;
+		const flaggedCategories: string[] = [];
+
+		for (const catId of guard.categories) {
+			const action = categoryActions.get(catId);
+			if (action === "block") {
+				shouldBlock = true;
+				flaggedCategories.push(catId);
+			} else if (action === "hold" || action === undefined) {
+				// Unknown categories default to "hold" (fail-safe)
+				shouldHold = true;
+				flaggedCategories.push(catId);
+			}
+			// "ignore" categories are skipped
+		}
+
+		if (shouldBlock) {
+			return {
+				status: "spam",
+				reason: `AI flagged: ${flaggedCategories.join(", ")}`,
+			};
+		}
+
+		if (shouldHold) {
+			return {
+				status: "pending",
+				reason: `AI flagged for review: ${flaggedCategories.join(", ")}`,
+			};
+		}
+
+		// AI flagged categories but all were "ignore" — treat as clean
+		aiRanClean = true;
+	}
+
+	// 4. AI error (fail-safe: hold for review)
+	if (guardError) {
+		return {
+			status: "pending",
+			reason: `AI error: ${guardError}`,
+		};
+	}
+
+	// 5. Auto-approve clean comments when configured
+	if (settings.autoApproveClean && aiRanClean) {
+		return { status: "approved", reason: "AI verified clean" };
+	}
+
+	// 6. Fall back to collection moderation settings
+	if (collectionSettings.commentsModeration === "none") {
+		return { status: "approved", reason: "Moderation disabled" };
+	}
+
+	if (collectionSettings.commentsModeration === "first_time" && priorApprovedCount > 0) {
+		return { status: "approved", reason: "Returning commenter" };
+	}
+
+	return { status: "pending", reason: "Held for review" };
+}
--- a/packages/plugins/ai-moderation/src/descriptor.ts
+++ b/packages/plugins/ai-moderation/src/descriptor.ts
@@ -0,0 +1,33 @@
+/**
+ * AI Moderation Plugin Descriptor
+ */
+
+import type { PluginDescriptor } from "emdash";
+
+import type { Category } from "./categories.js";
+
+export interface AIModerationOptions {
+	/** Override default categories */
+	categories?: Category[];
+	/** Auto-approve comments that pass AI checks (default: true) */
+	autoApproveClean?: boolean;
+	/** Workers AI binding name (default: "AI") */
+	aiBinding?: string;
+}
+
+/**
+ * Create the AI moderation plugin descriptor.
+ */
+export function aiModerationPlugin(
+	options: AIModerationOptions = {},
+): PluginDescriptor<AIModerationOptions> {
+	return {
+		id: "ai-moderation",
+		version: "0.1.0",
+		entrypoint: "@emdashcms/plugin-ai-moderation/plugin",
+		options,
+		adminEntry: "@emdashcms/plugin-ai-moderation/admin",
+		adminPages: [{ path: "/settings", label: "AI Moderation", icon: "shield" }],
+		adminWidgets: [{ id: "status", title: "AI Moderation", size: "third" }],
+	};
+}
--- a/packages/plugins/ai-moderation/src/guard.ts
+++ b/packages/plugins/ai-moderation/src/guard.ts
@@ -0,0 +1,100 @@
+/**
+ * Llama Guard Integration
+ *
+ * Builds prompts for and parses responses from Llama Guard 3 8B via Workers AI.
+ */
+
+export interface GuardResult {
+	safe: boolean;
+	categories: string[];
+}
+
+const CATEGORY_SPLIT_PATTERN = /[,\s]+/;
+
+/** Structural markers used by the Llama Guard prompt format */
+const STRUCTURAL_MARKERS = /(<\/?(?:BEGIN|END)\s+(?:UNSAFE CONTENT CATEGORIES|CONVERSATION)>)/gi;
+
+/**
+ * Sanitize user text to prevent prompt injection via structural markers.
+ */
+function sanitizeUserText(text: string): string {
+	return text.replace(STRUCTURAL_MARKERS, "");
+}
+
+/**
+ * Build a Llama Guard prompt for classifying a comment.
+ */
+export function buildGuardPrompt(text: string, taxonomy: string): string {
+	const sanitized = sanitizeUserText(text);
+	return `Task: Check if there is unsafe content in the 'User' message according to our safety policy with the below categories.
+
+<BEGIN UNSAFE CONTENT CATEGORIES>
+${taxonomy}
+<END UNSAFE CONTENT CATEGORIES>
+
+<BEGIN CONVERSATION>
+
+User: ${sanitized}
+
+<END CONVERSATION>
+
+Provide your safety assessment for the User message. First line must be 'safe' or 'unsafe'. If unsafe, list the violated categories on the second line separated by commas.`;
+}
+
+/**
+ * Parse a Llama Guard response into a structured result.
+ * Handles both text mode ("safe"/"unsafe\nS1,S6") and structured JSON mode
+ * ({ safe, categories }) as returned by Workers AI.
+ */
+export function parseGuardResponse(output: Ai_Cf_Meta_Llama_Guard_3_8B_Output): GuardResult {
+	const resp = output.response;
+	// Structured JSON mode — Workers AI returns { safe, categories } directly
+	if (typeof resp === "object" && resp !== null) {
+		return {
+			safe: resp.safe ?? true,
+			categories: resp.categories ?? [],
+		};
+	}
+
+	// Text mode — "safe" or "unsafe\nS1,S6"
+	if (typeof resp === "string") {
+		const lines = resp.trim().split("\n");
+		const firstLine = lines[0]?.trim().toLowerCase();
+
+		if (firstLine === "unsafe" && lines.length > 1) {
+			const categoryLine = lines[1]!.trim();
+			const categories = categoryLine
+				.split(CATEGORY_SPLIT_PATTERN)
+				.map((c) => c.trim())
+				.filter((c) => c.length > 0);
+			return { safe: false, categories };
+		}
+	}
+
+	// Default: safe (including undefined or unexpected responses)
+	return { safe: true, categories: [] };
+}
+
+/**
+ * Run Llama Guard classification via Workers AI.
+ */
+export async function runGuard(
+	text: string,
+	taxonomy: string,
+	aiBinding = "AI",
+): Promise<GuardResult> {
+	const { env } = await import("cloudflare:workers");
+	const ai = (env as Record<string, Ai>)[aiBinding];
+	if (!ai) {
+		throw new Error(`Workers AI binding "${aiBinding}" not found in env`);
+	}
+
+	const prompt = buildGuardPrompt(text, taxonomy);
+	const output = await ai.run("@cf/meta/llama-guard-3-8b", {
+		messages: [{ role: "user", content: prompt }],
+		max_tokens: 100,
+		temperature: 0.1,
+	});
+
+	return parseGuardResponse(output);
+}
--- a/packages/plugins/ai-moderation/src/index.ts
+++ b/packages/plugins/ai-moderation/src/index.ts
@@ -0,0 +1,235 @@
+/**
+ * AI Moderation Plugin
+ *
+ * Uses Cloudflare Workers AI (Llama Guard 3 8B) to moderate comments.
+ * Registers as the exclusive comment:moderate provider, replacing the
+ * built-in default moderator.
+ */
+
+import type { ResolvedPlugin } from "emdash";
+import { definePlugin } from "emdash";
+
+import { DEFAULT_CATEGORIES, buildTaxonomy } from "./categories.js";
+import type { Category } from "./categories.js";
+import { computeDecision } from "./decision.js";
+import type { AIModerationOptions } from "./descriptor.js";
+import { runGuard } from "./guard.js";
+import type { GuardResult } from "./guard.js";
+
+/** KV key for stored categories */
+const KV_CATEGORIES = "config:categories";
+/** KV key for behavior settings */
+const KV_BEHAVIOR = "config:behavior";
+
+/** Narrow unknown to a record */
+function isRecord(value: unknown): value is Record<string, unknown> {
+	return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+/**
+ * Create the AI moderation plugin.
+ */
+export function createPlugin(options: AIModerationOptions = {}): ResolvedPlugin {
+	const defaultAutoApprove = options.autoApproveClean ?? true;
+	const aiBinding = options.aiBinding ?? "AI";
+
+	/** Load categories from KV or fall back to options/defaults */
+	async function loadCategories(kv: {
+		get: <T>(key: string) => Promise<T | null>;
+	}): Promise<Category[]> {
+		const stored = await kv.get<Category[]>(KV_CATEGORIES);
+		return stored ?? options.categories ?? DEFAULT_CATEGORIES;
+	}
+
+	/** Load behavior settings from KV or fall back to defaults */
+	async function loadBehavior(kv: {
+		get: <T>(key: string) => Promise<T | null>;
+	}): Promise<{ autoApproveClean: boolean }> {
+		const stored = await kv.get<{ autoApproveClean: boolean }>(KV_BEHAVIOR);
+		return stored ?? { autoApproveClean: defaultAutoApprove };
+	}
+
+	return definePlugin({
+		id: "ai-moderation",
+		version: "0.1.0",
+		capabilities: [],
+		allowedHosts: [],
+
+		admin: {
+			entry: "@emdashcms/plugin-ai-moderation/admin",
+			pages: [{ path: "/settings", label: "AI Moderation", icon: "shield" }],
+			widgets: [{ id: "status", title: "AI Moderation", size: "third" }],
+		},
+
+		hooks: {
+			// Enrichment hook — runs AI guard, writes signals to metadata
+			"comment:beforeCreate": {
+				priority: 10,
+				errorPolicy: "continue",
+				handler: async (event, ctx) => {
+					const categories = await loadCategories(ctx.kv);
+
+					// Run AI guard (try/catch — failure is non-fatal)
+					let guard: GuardResult | undefined;
+					let guardError: string | undefined;
+
+					const taxonomy = buildTaxonomy(categories);
+					if (taxonomy) {
+						try {
+							guard = await runGuard(event.comment.body, taxonomy, aiBinding);
+						} catch (error) {
+							guardError = "AI classification failed";
+							ctx.log.error("AI guard failed", {
+								error: error instanceof Error ? error.message : String(error),
+							});
+						}
+					}
+
+					// Write signals to metadata for the moderator
+					event.metadata.aiGuard = guard;
+					event.metadata.aiGuardError = guardError;
+
+					return event;
+				},
+			},
+
+			// Exclusive moderator — reads metadata signals, computes decision
+			"comment:moderate": {
+				exclusive: true,
+				handler: async (event, ctx) => {
+					const categories = await loadCategories(ctx.kv);
+					const behavior = await loadBehavior(ctx.kv);
+
+					// Read signals from metadata (written by beforeCreate hook)
+					const guard = event.metadata.aiGuard as GuardResult | undefined;
+					const guardError = event.metadata.aiGuardError as string | undefined;
+
+					const isAuthenticated = !!event.comment.authorUserId;
+
+					return computeDecision(
+						guard,
+						guardError,
+						categories,
+						behavior,
+						event.collectionSettings,
+						event.priorApprovedCount,
+						isAuthenticated,
+					);
+				},
+			},
+		},
+
+		routes: {
+			// Get current settings
+			settings: {
+				handler: async (ctx) => {
+					const categories = await loadCategories(ctx.kv);
+					const behavior = await loadBehavior(ctx.kv);
+
+					return { categories, behavior };
+				},
+			},
+
+			// Save settings
+			"settings/save": {
+				handler: async (ctx) => {
+					const input = isRecord(ctx.input) ? ctx.input : {};
+
+					if (Array.isArray(input.categories)) {
+						const cats = input.categories as Category[];
+						const seenIds = new Set<string>();
+						for (const cat of cats) {
+							if (
+								typeof cat.id !== "string" ||
+								typeof cat.name !== "string" ||
+								typeof cat.description !== "string" ||
+								!cat.id ||
+								!cat.name ||
+								!cat.description ||
+								cat.id.length > 10 ||
+								cat.name.length > 100 ||
+								cat.description.length > 500 ||
+								!["block", "hold", "ignore"].includes(cat.action)
+							) {
+								return {
+									success: false,
+									error: `Invalid category: ${typeof cat.id === "string" ? cat.id : "missing id"}`,
+								};
+							}
+							if (seenIds.has(cat.id)) {
+								return {
+									success: false,
+									error: `Duplicate category ID: ${cat.id}`,
+								};
+							}
+							seenIds.add(cat.id);
+						}
+						await ctx.kv.set(KV_CATEGORIES, cats);
+					}
+
+					if (isRecord(input.behavior)) {
+						const behavior = {
+							autoApproveClean:
+								typeof input.behavior.autoApproveClean === "boolean"
+									? input.behavior.autoApproveClean
+									: defaultAutoApprove,
+						};
+						await ctx.kv.set(KV_BEHAVIOR, behavior);
+					}
+
+					return { success: true };
+				},
+			},
+
+			// Test AI analysis on sample text
+			"settings/test": {
+				handler: async (ctx) => {
+					const input = isRecord(ctx.input) ? ctx.input : {};
+					const text = typeof input.text === "string" ? input.text : "";
+
+					if (!text.trim()) {
+						return { success: false, error: "No text provided" };
+					}
+
+					const categories = await loadCategories(ctx.kv);
+
+					// Run AI guard
+					let guard: GuardResult | undefined;
+					let guardError: string | undefined;
+					const taxonomy = buildTaxonomy(categories);
+
+					if (taxonomy) {
+						try {
+							guard = await runGuard(text, taxonomy, aiBinding);
+						} catch (error) {
+							guardError = error instanceof Error ? error.message : String(error);
+						}
+					}
+
+					return {
+						success: true,
+						guard: guard ?? null,
+						guardError: guardError ?? null,
+						taxonomy,
+					};
+				},
+			},
+
+			// Plugin status for dashboard widget
+			status: {
+				handler: async (ctx) => {
+					const categories = await loadCategories(ctx.kv);
+					const behavior = await loadBehavior(ctx.kv);
+
+					return {
+						enabled: true,
+						categoryCount: categories.filter((c) => c.action !== "ignore").length,
+						autoApproveClean: behavior.autoApproveClean,
+					};
+				},
+			},
+		},
+	});
+}
+
+export default createPlugin;